# Creating New Columns from Existing Columns

In [3]:
#new columns can be derived from existing data, which helps to extract additional insights or
#to transform the dataset for better analysis. 

In [16]:
import pandas as pd

In [30]:
#Basic Arithmetic Operations

In [32]:
data = {
    'Name': ['Alice', 'Bob', 'Charlie', 'David', 'Eva'],
    'Age': [23, 35, 45, 22, 28],
    'Salary': [50000, 60000, 70000, 45000, 52000],
    'Experience': [2, 8, 12, 1, 4]
}

In [34]:
df=pd.DataFrame(data)

In [36]:
df

Unnamed: 0,Name,Age,Salary,Experience
0,Alice,23,50000,2
1,Bob,35,60000,8
2,Charlie,45,70000,12
3,David,22,45000,1
4,Eva,28,52000,4


In [54]:
# Adding a 'Salary per Year of Experience' column
df['Salary_per_Year_Exp'] = df['Salary'] / df['Experience']

In [58]:
df

Unnamed: 0,Name,Age,Salary,Experience,Salary_per_Year_Exp
0,Alice,23,50000,2,25000.0
1,Bob,35,60000,8,7500.0
2,Charlie,45,70000,12,5833.333333
3,David,22,45000,1,45000.0
4,Eva,28,52000,4,13000.0


In [68]:
#2. Conditional Column 
#You can create a column based on a condition using np.where or apply().

In [70]:
# Adding a 'Senior' column based on age
df['Senior'] = df['Age'].apply(lambda x: 'Yes' if x >= 30 else 'No')

In [72]:
df

Unnamed: 0,Name,Age,Salary,Experience,Salary_per_Year_Exp,Senior
0,Alice,23,50000,2,25000.0,No
1,Bob,35,60000,8,7500.0,Yes
2,Charlie,45,70000,12,5833.333333,Yes
3,David,22,45000,1,45000.0,No
4,Eva,28,52000,4,13000.0,No


In [74]:
# Adding a 'Senior' column based on age
df['Senior'] = df['Age'].apply(lambda x: 'Yes' if x >= 30 else 'No')

In [76]:
df

Unnamed: 0,Name,Age,Salary,Experience,Salary_per_Year_Exp,Senior
0,Alice,23,50000,2,25000.0,No
1,Bob,35,60000,8,7500.0,Yes
2,Charlie,45,70000,12,5833.333333,Yes
3,David,22,45000,1,45000.0,No
4,Eva,28,52000,4,13000.0,No


In [None]:
#Creating Columns Using Aggregations

In [78]:
# Creating a column to show average salary by age
df['Average_Salary_by_Age'] = df.groupby('Age')['Salary'].transform('mean')

In [81]:
df

Unnamed: 0,Name,Age,Salary,Experience,Salary_per_Year_Exp,Senior,Average_Salary_by_Age
0,Alice,23,50000,2,25000.0,No,50000.0
1,Bob,35,60000,8,7500.0,Yes,60000.0
2,Charlie,45,70000,12,5833.333333,Yes,70000.0
3,David,22,45000,1,45000.0,No,45000.0
4,Eva,28,52000,4,13000.0,No,52000.0


In [83]:
#4. Creating Columns Using Mapping

In [87]:
# Mapping 'Experience' to a category (Junior, Mid, Senior)
experience_map = {range(0, 5): 'Junior', range(5, 10): 'Mid', range(10, 20): 'Senior'}
df['Experience_Level'] = df['Experience'].apply(
    lambda x: next((v for k, v in experience_map.items() if x in k), 'Unknown')
)


In [89]:
df

Unnamed: 0,Name,Age,Salary,Experience,Salary_per_Year_Exp,Senior,Average_Salary_by_Age,Experience_Level
0,Alice,23,50000,2,25000.0,No,50000.0,Junior
1,Bob,35,60000,8,7500.0,Yes,60000.0,Mid
2,Charlie,45,70000,12,5833.333333,Yes,70000.0,Senior
3,David,22,45000,1,45000.0,No,45000.0,Junior
4,Eva,28,52000,4,13000.0,No,52000.0,Junior


In [91]:
#Combining Multiple Columns

In [93]:
# Creating a new column combining 'Name' and 'Experience'
df['Name_Experience'] = df['Name'] + ' (' + df['Experience'].astype(str) + ' years)'



In [95]:
df

Unnamed: 0,Name,Age,Salary,Experience,Salary_per_Year_Exp,Senior,Average_Salary_by_Age,Experience_Level,Name_Experience
0,Alice,23,50000,2,25000.0,No,50000.0,Junior,Alice (2 years)
1,Bob,35,60000,8,7500.0,Yes,60000.0,Mid,Bob (8 years)
2,Charlie,45,70000,12,5833.333333,Yes,70000.0,Senior,Charlie (12 years)
3,David,22,45000,1,45000.0,No,45000.0,Junior,David (1 years)
4,Eva,28,52000,4,13000.0,No,52000.0,Junior,Eva (4 years)


In [None]:
#Key Insights for Data Analysts:
#Efficiency: Using vectorized operations (e.g., arithmetic or conditional operations) is much faster than row-wise operations.
#Transformation: Transforming data into meaningful categories or new metrics can help extract better insights.
#GroupBy: Aggregations using groupby and transform are powerful for deriving new features from existing data.
#Mapping: Use dictionaries or functions to map data to new categories or classifications.