<a href="https://colab.research.google.com/github/KalusaniLaxman/ADM/blob/main/Untitled37.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [28]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from datetime import datetime

In [15]:
# 1. Data Encoding
data = {
    'Category': ['A', 'B', 'C', 'A', 'B', 'C', 'A', 'B', 'C', 'A'],
    'Subcategory': ['X', 'Y', 'Z', 'X', 'Y', 'Z', 'X', 'Y', 'Z', 'X']
}

In [16]:
df = pd.DataFrame(data)


In [17]:
# Label Encoding
label_encoder = LabelEncoder()
df['Category_Label'] = label_encoder.fit_transform(df['Category'])
df['Subcategory_Label'] = label_encoder.fit_transform(df['Subcategory'])

In [18]:
# One-hot Encoding
one_hot_encoded = pd.get_dummies(df[['Category', 'Subcategory']], prefix=['Category', 'Subcategory'])
df = pd.concat([df, one_hot_encoded], axis=1)

In [19]:
print("Data Encoding Output:\n", df)

Data Encoding Output:
   Category Subcategory  Category_Label  Subcategory_Label  Category_A  \
0        A           X               0                  0        True   
1        B           Y               1                  1       False   
2        C           Z               2                  2       False   
3        A           X               0                  0        True   
4        B           Y               1                  1       False   
5        C           Z               2                  2       False   
6        A           X               0                  0        True   
7        B           Y               1                  1       False   
8        C           Z               2                  2       False   
9        A           X               0                  0        True   

   Category_B  Category_C  Subcategory_X  Subcategory_Y  Subcategory_Z  
0       False       False           True          False          False  
1        True       False  

In [20]:
# 2. Creating Derived Columns
data = {
    'Name': ['Alice', 'Bob', 'Charlie', 'David', 'Eve', 'Frank', 'Grace', 'Helen', 'Ian', 'Jack'],
    'Date_of_Birth': ['1990-05-14', '1985-08-23', '1992-11-30', '1988-04-15', '1995-07-19',
                       '1983-01-10', '1997-06-25', '1980-12-03', '1993-03-29', '1991-09-17']
}
df = pd.DataFrame(data)

In [21]:
# Convert Date_of_Birth to datetime
df['Date_of_Birth'] = pd.to_datetime(df['Date_of_Birth'])

In [22]:
# Calculate Age
current_year = datetime.now().year
df['Age'] = current_year - df['Date_of_Birth'].dt.year

In [24]:
# Calculate Age
current_year = datetime.now().year
df['Age'] = current_year - df['Date_of_Birth'].dt.year
print("\nDerived Columns Output:\n", df)



Derived Columns Output:
       Name Date_of_Birth  Age
0    Alice    1990-05-14   35
1      Bob    1985-08-23   40
2  Charlie    1992-11-30   33
3    David    1988-04-15   37
4      Eve    1995-07-19   30
5    Frank    1983-01-10   42
6    Grace    1997-06-25   28
7    Helen    1980-12-03   45
8      Ian    1993-03-29   32
9     Jack    1991-09-17   34


In [25]:
# 3. Grouping and Aggregating Data
data = {
    'Category': ['A', 'A', 'B', 'B', 'C', 'A', 'B', 'C', 'C', 'A'],
    'Subcategory': ['X', 'Y', 'Z', 'X', 'Y', 'X', 'Z', 'Y', 'Z', 'X'],
    'Value': [10, 20, 10, 30, 40, 15, 25, 35, 45, 50]
}
df = pd.DataFrame(data)

In [26]:
# Group by and aggregate
grouped = df.groupby(['Category', 'Subcategory']).agg({'Value': ['sum', 'mean', 'count']})
print("\nGrouped Data:\n", grouped)


Grouped Data:
                      Value            
                       sum  mean count
Category Subcategory                  
A        X              75  25.0     3
         Y              20  20.0     1
B        X              30  30.0     1
         Z              35  17.5     2
C        Y              75  37.5     2
         Z              45  45.0     1


In [27]:
# Pivot table
pivot = df.pivot_table(index='Category', columns='Subcategory', values='Value', aggfunc=['sum', 'mean'])
print("\nPivot Table:\n", pivot)


Pivot Table:
               sum              mean            
Subcategory     X     Y     Z     X     Y     Z
Category                                       
A            75.0  20.0   NaN  25.0  20.0   NaN
B            30.0   NaN  35.0  30.0   NaN  17.5
C             NaN  75.0  45.0   NaN  37.5  45.0


In [29]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from datetime import datetime

data = {
    'Employee_ID': [101, 102, 103, 104, 105],
    'Department': ['HR', 'IT', 'Finance', 'Sales', 'IT'],
    'Joining_Date': ['2015-06-15', '2018-09-10', '2012-04-23', '2020-11-30', '2016-08-25'],
    'Salary': [50000, 70000, 65000, 45000, 72000]
}
df = pd.DataFrame(data)

df['Joining_Date'] = pd.to_datetime(df['Joining_Date'])

current_year = datetime.now().year
df['Years_Experience'] = current_year - df['Joining_Date'].dt.year

label_encoder = LabelEncoder()
df['Label_Encoded'] = label_encoder.fit_transform(df['Department'])

df = pd.concat([df, pd.get_dummies(df['Department'], prefix='Dept')], axis=1)

grouped = df.groupby('Department').agg({'Salary': ['sum', 'mean'], 'Years_Experience': ['mean']})

pivot = df.pivot_table(index='Department', values=['Salary', 'Years_Experience'], aggfunc=['sum', 'mean'])

print("Transformed Data:")
print(df)
print("\n\n\nGrouped Data:")
print(grouped)
print("\n\n\nPivot Table:")
print(pivot)


Transformed Data:
   Employee_ID Department Joining_Date  Salary  Years_Experience  \
0          101         HR   2015-06-15   50000                10   
1          102         IT   2018-09-10   70000                 7   
2          103    Finance   2012-04-23   65000                13   
3          104      Sales   2020-11-30   45000                 5   
4          105         IT   2016-08-25   72000                 9   

   Label_Encoded  Dept_Finance  Dept_HR  Dept_IT  Dept_Sales  
0              1         False     True    False       False  
1              2         False    False     True       False  
2              0          True    False    False       False  
3              3         False    False    False        True  
4              2         False    False     True       False  



Grouped Data:
            Salary          Years_Experience
               sum     mean             mean
Department                                  
Finance      65000  65000.0             13.