In [None]:
import numpy as np
import pandas as pd

In [None]:
Create dataframe

In [4]:
import pandas as pd  # Import pandas

# Define data
data = {
    'Name': ['Alice', 'Bob', 'Charlie', 'David', 'Eve'],
    'Age': [24, 27, 22, 32, 29],
    'City': ['New York', 'Los Angeles', 'Chicago', 'Houston', 'Phoenix'],
    'Experience': [2, 5, 1, 10, 7]
}

# Create DataFrame
df = pd.DataFrame(data)

# Displaying data types of columns
print("Data types of columns:\n", df.dtypes)


Data types of columns:
 Name          object
Age            int64
City          object
Experience     int64
dtype: object


In [None]:
Converting data types

In [5]:
df['Age'] = df['Age'].astype(float)
print("\nDataFrame after converting Age to float:\n", df.dtypes)


DataFrame after converting Age to float:
 Name           object
Age           float64
City           object
Experience      int64
dtype: object


In [None]:
Setting a column as the index

In [6]:
df.set_index('Name', inplace=True)
print("\nDataFrame after setting Name as index:\n", df)


DataFrame after setting Name as index:
           Age         City  Experience
Name                                  
Alice    24.0     New York           2
Bob      27.0  Los Angeles           5
Charlie  22.0      Chicago           1
David    32.0      Houston          10
Eve      29.0      Phoenix           7


In [None]:
Resetting the index

In [7]:
df.reset_index(inplace=True)
print("\nDataFrame after resetting the index:\n", df)


DataFrame after resetting the index:
       Name   Age         City  Experience
0    Alice  24.0     New York           2
1      Bob  27.0  Los Angeles           5
2  Charlie  22.0      Chicago           1
3    David  32.0      Houston          10
4      Eve  29.0      Phoenix           7


In [None]:
Using the apply function to apply a function to each column

In [None]:
df['Salary in K'] = df['Salary'].apply(lambda x: x / 1000)
print("\nDataFrame after applying a function to the Salary column:\n", df)

In [None]:
Using the applymap function to apply a function element-wise

In [None]:
df[['Age', 'Salary']] = df[['Age', 'Salary']].applymap(lambda x: x + 1)
print("\nDataFrame after applying a function element-wise to Age and Salary:\n", df)

In [None]:
Using the map function to map values in a Series

In [11]:
city_map = {
    'New York': 'NY',
    'Los Angeles': 'LA',
    'Chicago': 'CHI',
    'Houston': 'HOU',
    'Phoenix': 'PHX'
}

df['City Abbreviation'] = df['City'].map(city_map)
print("\nDataFrame after mapping City to abbreviations:\n", df)


DataFrame after mapping City to abbreviations:
       Name   Age         City  Experience City Abbreviation
0    Alice  24.0     New York           2                NY
1      Bob  27.0  Los Angeles           5                LA
2  Charlie  22.0      Chicago           1               CHI
3    David  32.0      Houston          10               HOU
4      Eve  29.0      Phoenix           7               PHX


In [None]:
Using the merge function to merge DataFrames

In [12]:
additional_data = {
    'Name': ['Alice', 'Bob', 'Charlie', 'David', 'Eve'],
    'Department': ['HR', 'Engineering', 'Marketing', 'Sales', 'Finance']
}
df_additional = pd.DataFrame(additional_data)
merged_df = pd.merge(df, df_additional, on='Name')
print("\nMerged DataFrame:\n", merged_df)


Merged DataFrame:
       Name   Age         City  Experience City Abbreviation   Department
0    Alice  24.0     New York           2                NY           HR
1      Bob  27.0  Los Angeles           5                LA  Engineering
2  Charlie  22.0      Chicago           1               CHI    Marketing
3    David  32.0      Houston          10               HOU        Sales
4      Eve  29.0      Phoenix           7               PHX      Finance


In [None]:
Using the pivot_table function for data summarization

In [None]:
pivot = df.pivot_table(values='Salary', index='City', columns='Experience', aggfunc=np.mean)
print("\nPivot table of Salary by City and Experience:\n", pivot)

In [None]:
Using the melt function to unpivot a DataFrame from wide to long format

In [None]:
melted = df.melt(id_vars=['Name', 'City'], value_vars=['Age', 'Salary'])
print("\nMelted DataFrame:\n", melted)

In [None]:
Using the pivot function to pivot a DataFrame from long to wide format

In [None]:
pivoted = melted.pivot(index='Name', columns='variable', values='value')
print("\nPivoted DataFrame:\n", pivoted)

In [None]:
Using the cut function to segment and sort data into bins

In [16]:
df['Age Group'] = pd.cut(df['Age'], bins=[20, 25, 30, 35], labels=['20-25', '25-30', '30-35'])
print("\nDataFrame after segmenting Age into bins:\n", df)


DataFrame after segmenting Age into bins:
       Name   Age         City  Experience City Abbreviation Age Group
0    Alice  24.0     New York           2                NY     20-25
1      Bob  27.0  Los Angeles           5                LA     25-30
2  Charlie  22.0      Chicago           1               CHI     20-25
3    David  32.0      Houston          10               HOU     30-35
4      Eve  29.0      Phoenix           7               PHX     25-30


In [None]:
Using the qcut function to segment and sort data into quantiles

In [None]:
df['Salary Quantile'] = pd.qcut(df['Salary'], q=4, labels=['Q1', 'Q2', 'Q3', 'Q4'])
print("\nDataFrame after segmenting Salary into quantiles:\n", df)

In [None]:
Using the sample function to randomly sample rows

In [18]:
sampled_df = df.sample(n=3)
print("\nRandomly sampled DataFrame:\n", sampled_df)


Randomly sampled DataFrame:
       Name   Age      City  Experience City Abbreviation Age Group
2  Charlie  22.0   Chicago           1               CHI     20-25
0    Alice  24.0  New York           2                NY     20-25
4      Eve  29.0   Phoenix           7               PHX     25-30


In [None]:
Using the isin function to filter DataFrame based on a list of values

In [19]:
filtered_df = df[df['City'].isin(['New York', 'Chicago'])]
print("\nFiltered DataFrame with cities New York and Chicago:\n", filtered_df)


Filtered DataFrame with cities New York and Chicago:
       Name   Age      City  Experience City Abbreviation Age Group
0    Alice  24.0  New York           2                NY     20-25
2  Charlie  22.0   Chicago           1               CHI     20-25


In [None]:
Using the rank function to compute numerical data ranks

In [None]:
df['Salary Rank'] = df['Salary'].rank()
print("\nDataFrame after computing salary ranks:\n", df)

In [None]:
Saving DataFrame to an Excel file

In [21]:
df.to_excel('employee_data.xlsx', index=False)
print("\nDataFrame saved to 'employee_data.xlsx'")


DataFrame saved to 'employee_data.xlsx'


In [None]:
Reading DataFrame from an Excel file

In [22]:
df_from_excel = pd.read_excel('employee_data.xlsx')
print("\nDataFrame read from 'employee_data.xlsx':\n", df_from_excel)


DataFrame read from 'employee_data.xlsx':
       Name  Age         City  Experience City Abbreviation Age Group
0    Alice   24     New York           2                NY     20-25
1      Bob   27  Los Angeles           5                LA     25-30
2  Charlie   22      Chicago           1               CHI     20-25
3    David   32      Houston          10               HOU     30-35
4      Eve   29      Phoenix           7               PHX     25-30
