In [None]:
import pandas as pd
import numpy as np

In [None]:
data={
    'ID':[1,2,3,4,5,6],
    'Name':['Nehali','Nakshatra','Tanya','Omkar','Darshil','Jayesh'],
    'Age':[17,21,20,22,16,15],
    'Country':['France','France','USA','India','Canada','Scotland'],
    'Sales':[200,450,300,800,150,400]
}

In [None]:
#creating dataframe
df=pd.DataFrame(data)
print("Original DataFrame")
df

Original DataFrame


Unnamed: 0,ID,Name,Age,Country,Sales
0,1,Nehali,17,France,200
1,2,Nakshatra,21,France,450
2,3,Tanya,20,USA,300
3,4,Omkar,22,India,800
4,5,Darshil,16,Canada,150
5,6,Jayesh,15,Scotland,400


In [None]:
#1.Data Flow Transformation
#Character Map
#Description: Transform text data by changing the case of characters.
#Here,we will convert the name column to uppercase.
df['Name_upper']=df['Name'].str.upper()
df[['ID','Name','Name_upper']]

Unnamed: 0,ID,Name,Name_upper
0,1,Nehali,NEHALI
1,2,Nakshatra,NAKSHATRA
2,3,Tanya,TANYA
3,4,Omkar,OMKAR
4,5,Darshil,DARSHIL
5,6,Jayesh,JAYESH


In [None]:
#2. multicast:create two copies of the dataset
df_copy1=df.copy()
df_copy2=df.copy()

#transformation on each copy
df_copy1['Sales'] *= 1.1 #increase sales by 10%
df_copy2['Age'] += 5     #increase age by 5 years



In [None]:
df_copy1

Unnamed: 0,ID,Name,Age,Country,Sales,Name_upper
0,1,Nehali,17,France,220.0,NEHALI
1,2,Nakshatra,21,France,495.0,NAKSHATRA
2,3,Tanya,20,USA,330.0,TANYA
3,4,Omkar,22,India,880.0,OMKAR
4,5,Darshil,16,Canada,165.0,DARSHIL
5,6,Jayesh,15,Scotland,440.0,JAYESH


In [None]:
df_copy2

Unnamed: 0,ID,Name,Age,Country,Sales,Name_upper
0,1,Nehali,22,France,200,NEHALI
1,2,Nakshatra,26,France,450,NAKSHATRA
2,3,Tanya,25,USA,300,TANYA
3,4,Omkar,27,India,800,OMKAR
4,5,Darshil,21,Canada,150,DARSHIL
5,6,Jayesh,20,Scotland,400,JAYESH


In [None]:
#3. conditional split: split the data into two parts based on some condition
high_sales=df[df['Sales']>300]
low_sales=df[df['Sales']<=300]

In [None]:
high_sales

Unnamed: 0,ID,Name,Age,Country,Sales,Name_upper
1,2,Nakshatra,21,France,450,NAKSHATRA
3,4,Omkar,22,India,800,OMKAR
5,6,Jayesh,15,Scotland,400,JAYESH


In [None]:
low_sales

Unnamed: 0,ID,Name,Age,Country,Sales,Name_upper
0,1,Nehali,17,France,200,NEHALI
2,3,Tanya,20,USA,300,TANYA
4,5,Darshil,16,Canada,150,DARSHIL


In [None]:
#4. Aggregation
#Description: Aggregate data,ex,calculate total sales by country.
#Aggregation: Total sales by Country
agg_df=df.groupby('Country')['Sales'].sum().reset_index()
print("\nAggregation (Total Sales by Country):")
print(agg_df)


Aggregation (Total Sales by Country):
    Country  Sales
0    Canada    150
1    France    650
2     India    800
3  Scotland    400
4       USA    300


In [None]:
#5. Sort
#Description: Sort the dataset by Sales in descending order.
#Sort: Sort by Sales in Descending order
sorted_df=df.sort_values(by='Sales',ascending=False)
print("\nSort (Descending Sales):")
print(sorted_df)


Sort (Descending Sales):
   ID       Name  Age   Country  Sales Name_upper
3   4      Omkar   22     India    800      OMKAR
1   2  Nakshatra   21    France    450  NAKSHATRA
5   6     Jayesh   15  Scotland    400     JAYESH
2   3      Tanya   20       USA    300      TANYA
0   1     Nehali   17    France    200     NEHALI
4   5    Darshil   16    Canada    150    DARSHIL


In [None]:
#6. Derived Column: Categorize sales as 'High' or 'Low'
df['Sales_Category']=df['Sales'].apply(lambda x: 'High' if x>300 else 'Low')
print("\nDerived Column (Sales Category)")
df


Derived Column (Sales Category)


Unnamed: 0,ID,Name,Age,Country,Sales,Name_upper,Sales_Category
0,1,Nehali,17,France,200,NEHALI,Low
1,2,Nakshatra,21,France,450,NAKSHATRA,High
2,3,Tanya,20,USA,300,TANYA,Low
3,4,Omkar,22,India,800,OMKAR,High
4,5,Darshil,16,Canada,150,DARSHIL,Low
5,6,Jayesh,15,Scotland,400,JAYESH,High
