In [1]:
# pivot() and pivot_table()
import pandas as pd

# Sample DataFrame - Indian Employee Data
data = {
    'Name': ['Rajesh', 'Preeti', 'Amit', 'Sneha', 'Manoj'],
    'Salary': [50000, 60000, 55000, 52000, 48000],
    'Incentive': [2000, 2500, 1800, 2200, 2100],
    'Job Title': ['Software Engineer', 'Data Analyst', 'Project Manager', 'HR Manager', 'Business Analyst']
}

df = pd.DataFrame(data)

print("Sample DataFrame - Indian Employee Data:")
print(df)

Sample DataFrame - Indian Employee Data:
     Name  Salary  Incentive          Job Title
0  Rajesh   50000       2000  Software Engineer
1  Preeti   60000       2500       Data Analyst
2    Amit   55000       1800    Project Manager
3   Sneha   52000       2200         HR Manager
4   Manoj   48000       2100   Business Analyst


In [2]:
# Using pivot() to reshape the data
pivot_result = df.pivot(index='Name', columns='Job Title', values=['Salary', 'Incentive'])
 
print("\nUsing pivot() to reshape the data:")
print(pivot_result)
 
# Using pivot_table() to summarize the data
pivot_table_result = df.pivot_table(index='Name', columns='Job Title', values=['Salary', 'Incentive'], aggfunc='sum')
 
print("\nUsing pivot_table() to summarize the data:")
print(pivot_table_result)


Using pivot() to reshape the data:
                    Salary                                          \
Job Title Business Analyst Data Analyst HR Manager Project Manager   
Name                                                                 
Amit                   NaN          NaN        NaN         55000.0   
Manoj              48000.0          NaN        NaN             NaN   
Preeti                 NaN      60000.0        NaN             NaN   
Rajesh                 NaN          NaN        NaN             NaN   
Sneha                  NaN          NaN    52000.0             NaN   

                                   Incentive                          \
Job Title Software Engineer Business Analyst Data Analyst HR Manager   
Name                                                                   
Amit                    NaN              NaN          NaN        NaN   
Manoj                   NaN           2100.0          NaN        NaN   
Preeti                  NaN              Na

In [3]:
# stack() and unstack()
# Using pivot_table() to summarize the data
pivot_table_result = df.pivot_table(index='Name', columns='Job Title', values=['Salary', 'Incentive'], aggfunc='sum')
 
# Using stack() to pivot the data
stacked_data = pivot_table_result.stack()
 
print("\nUsing stack() to pivot the data:")
print(stacked_data)
 
# Using unstack() to revert the pivot
unstacked_data = stacked_data.unstack()
 
print("\nUsing unstack() to revert the pivot:")
print(unstacked_data)


Using stack() to pivot the data:
                          Incentive   Salary
Name   Job Title                            
Amit   Project Manager       1800.0  55000.0
Manoj  Business Analyst      2100.0  48000.0
Preeti Data Analyst          2500.0  60000.0
Rajesh Software Engineer     2000.0  50000.0
Sneha  HR Manager            2200.0  52000.0

Using unstack() to revert the pivot:
                 Incentive                                          \
Job Title Business Analyst Data Analyst HR Manager Project Manager   
Name                                                                 
Amit                   NaN          NaN        NaN          1800.0   
Manoj               2100.0          NaN        NaN             NaN   
Preeti                 NaN       2500.0        NaN             NaN   
Rajesh                 NaN          NaN        NaN             NaN   
Sneha                  NaN          NaN     2200.0             NaN   

                                      Salary        

  stacked_data = pivot_table_result.stack()


In [4]:
# melt() and wide_to_long()
# Using melt() to unpivot the data
melted_data = pd.melt(df, id_vars=['Name'], value_vars=['Salary', 'Incentive'], var_name='Attribute', value_name='Amount')
 
print("\nUsing melt() to unpivot the data:")
print(melted_data)
 
# Creating a wide DataFrame for demonstration
wide_df = pd.DataFrame({
    'Name': ['Rajesh', 'Preeti', 'Amit', 'Sneha', 'Manoj'],
    'Salary_2022': [50000, 60000, 55000, 52000, 48000],
    'Salary_2023': [52000, 61000, 56000, 53000, 49000],
    'Incentive_2022': [2000, 2500, 1800, 2200, 2100],
    'Incentive_2023': [2100, 2600, 1900, 2300, 2200]
})
 
print("\nSample DataFrame - Wide Format:")
print(wide_df)
 
# Using wide_to_long() to unpivot the wide DataFrame
long_df = pd.wide_to_long(wide_df, stubnames=['Salary', 'Incentive'], i='Name', j='Year', sep='_')
 
print("\nUsing wide_to_long() to unpivot the wide DataFrame:")
print(long_df)


Using melt() to unpivot the data:
     Name  Attribute  Amount
0  Rajesh     Salary   50000
1  Preeti     Salary   60000
2    Amit     Salary   55000
3   Sneha     Salary   52000
4   Manoj     Salary   48000
5  Rajesh  Incentive    2000
6  Preeti  Incentive    2500
7    Amit  Incentive    1800
8   Sneha  Incentive    2200
9   Manoj  Incentive    2100

Sample DataFrame - Wide Format:
     Name  Salary_2022  Salary_2023  Incentive_2022  Incentive_2023
0  Rajesh        50000        52000            2000            2100
1  Preeti        60000        61000            2500            2600
2    Amit        55000        56000            1800            1900
3   Sneha        52000        53000            2200            2300
4   Manoj        48000        49000            2100            2200

Using wide_to_long() to unpivot the wide DataFrame:
             Salary  Incentive
Name   Year                   
Rajesh 2022   50000       2000
Preeti 2022   60000       2500
Amit   2022   55000       18

In [6]:
# get_dummies() and from_dummies()

# Sample DataFrame - Indian Employee Data
data = {
    'Name': ['Rajesh', 'Preeti', 'Amit', 'Sneha', 'Manoj'],
    'Department': ['IT', 'HR', 'Finance', 'IT', 'Marketing']
}
 
df = pd.DataFrame(data)
 
print("\nSample DataFrame - Indian Employee Data:")
print(df)
 
# Using get_dummies() to convert categorical variable into dummy/indicator variables
dummy_data = pd.get_dummies(df['Department'])
 
print("\nUsing get_dummies() to convert categorical variable into dummy/indicator variables:")
print(dummy_data)
 
# Using from_dummies() to revert back to categorical variable
reverted_data = pd.get_dummies(df['Department']).idxmax(axis=1)
 
print("\nUsing from_dummies() to revert back to categorical variable:")
print(reverted_data)


Sample DataFrame - Indian Employee Data:
     Name Department
0  Rajesh         IT
1  Preeti         HR
2    Amit    Finance
3   Sneha         IT
4   Manoj  Marketing

Using get_dummies() to convert categorical variable into dummy/indicator variables:
   Finance     HR     IT  Marketing
0    False  False   True      False
1    False   True  False      False
2     True  False  False      False
3    False  False   True      False
4    False  False  False       True

Using from_dummies() to revert back to categorical variable:
0           IT
1           HR
2      Finance
3           IT
4    Marketing
dtype: object


In [8]:
# explode()
# Sample DataFrame - Indian Employee Data with multiple skills
data = {
    'Name': ['Rajesh', 'Preeti', 'Amit', 'Sneha', 'Manoj'],
    'Skills': [['Python', 'Java'], ['Excel', 'Communication'], ['Finance', 'Analysis'], ['Python', 'SQL'], ['Marketing', 'Sales']]
}
 
df = pd.DataFrame(data)
 
print("\nSample DataFrame - Indian Employee Data with multiple skills:")
print(df)
 
# Using explode() to convert list-like values to individual rows
exploded_data = df.explode('Skills')
 
print("\nUsing explode() to convert list-like values to individual rows:")
print(exploded_data)


Sample DataFrame - Indian Employee Data with multiple skills:
     Name                  Skills
0  Rajesh          [Python, Java]
1  Preeti  [Excel, Communication]
2    Amit     [Finance, Analysis]
3   Sneha           [Python, SQL]
4   Manoj      [Marketing, Sales]

Using explode() to convert list-like values to individual rows:
     Name         Skills
0  Rajesh         Python
0  Rajesh           Java
1  Preeti          Excel
1  Preeti  Communication
2    Amit        Finance
2    Amit       Analysis
3   Sneha         Python
3   Sneha            SQL
4   Manoj      Marketing
4   Manoj          Sales


In [10]:
# crosstab()
# Sample DataFrame - Indian Employee Data with departments
data = {
    'Name': ['Rajesh', 'Preeti', 'Amit', 'Sneha', 'Manoj'],
    'Department': ['IT', 'HR', 'Finance', 'IT', 'Marketing'],
    'Job Title': ['Software Engineer', 'HR Manager', 'Financial Analyst', 'Software Engineer', 'Marketing Specialist']
}
 
df = pd.DataFrame(data)
 
print("\nSample DataFrame - Indian Employee Data with departments:")
print(df)
 
# Using crosstab() to calculate cross-tabulation
cross_tab = pd.crosstab(df['Department'], df['Job Title'])
 
print("\nUsing crosstab() to calculate cross-tabulation:")
print(cross_tab)


Sample DataFrame - Indian Employee Data with departments:
     Name Department             Job Title
0  Rajesh         IT     Software Engineer
1  Preeti         HR            HR Manager
2    Amit    Finance     Financial Analyst
3   Sneha         IT     Software Engineer
4   Manoj  Marketing  Marketing Specialist

Using crosstab() to calculate cross-tabulation:
Job Title   Financial Analyst  HR Manager  Marketing Specialist  \
Department                                                        
Finance                     1           0                     0   
HR                          0           1                     0   
IT                          0           0                     0   
Marketing                   0           0                     1   

Job Title   Software Engineer  
Department                     
Finance                     0  
HR                          0  
IT                          2  
Marketing                   0  


In [None]:
# cut()
# Sample DataFrame - Indian Employee Data with salaries
data = {
    'Name': ['Rajesh', 'Preeti', 'Amit', 'Sneha', 'Manoj'],
    'Salary': [60000, 55000, 70000, 50000, 75000]
}
 
df = pd.DataFrame(data)
 
print("\nSample DataFrame - Indian Employee Data with salaries:")
print(df)
 
# Using cut() to transform continuous variable into categorical variable
bins = [40000, 60000, 80000]
labels = ['Low', 'Medium', 'High']
 
df['Salary_Category'] = pd.cut(df['Salary'], bins=bins, labels=labels)
 
print("\nUsing cut() to transform continuous variable into categorical variable:")
print(df)

In [None]:
# factorize()
# Sample DataFrame - Indian Employee Data with job titles
data = {
    'Name': ['Rajesh', 'Preeti', 'Amit', 'Sneha', 'Manoj'],
    'Job Title': ['Software Engineer', 'HR Manager', 'Financial Analyst', 'Software Engineer', 'Marketing Specialist']
}
 
df = pd.DataFrame(data)
 
print("\nSample DataFrame - Indian Employee Data with job titles:")
print(df)
 
# Using factorize() to encode categorical variable into integer labels
df['Job_Title_Label'], unique_job_titles = pd.factorize(df['Job Title'])
 
print("\nUsing factorize() to encode categorical variable into integer labels:")
print(df)