In [1]:
# Importing pandas library
import pandas as pd


In [2]:
# Creating a DataFrame
# Create a DataFrame from a dictionary
data = {'Name': ['John', 'Alice', 'Bob'],
        'Age': [23, 30, 25],
        'City': ['New York', 'Los Angeles', 'Chicago']}
df = pd.DataFrame(data)
print("DataFrame from dictionary:\n", df)


DataFrame from dictionary:
     Name  Age         City
0   John   23     New York
1  Alice   30  Los Angeles
2    Bob   25      Chicago


In [3]:
# Reading Data from a CSV File
# Read a CSV file into a DataFrame
df_csv = pd.read_csv('vgsales1.csv')
print("DataFrame from CSV file:\n", df_csv)


DataFrame from CSV file:
         Rank                                               Name Platform  \
0        259                                          Asteroids     2600   
1        545                                    Missile Command     2600   
2       1768                                            Kaboom!     2600   
3       1971                                           Defender     2600   
4       2671                                             Boxing     2600   
...      ...                                                ...      ...   
16319  16565                                       Mighty No. 9     XOne   
16320  16572                                 Resident Evil 4 HD     XOne   
16321  16573                      Farming 2017 - The Simulation      PS4   
16322  16579                                  Rugby Challenge 3     XOne   
16323  16592  Chou Ezaru wa Akai Hana: Koi wa Tsuki ni Shiru...      PSV   

       Year       Genre             Publisher  NA_Sales  EU_S

In [4]:
# Writing Data to a CSV File
# Write a DataFrame to a CSV file
df.to_csv('output.csv', index=False)
print("DataFrame written to CSV file")


DataFrame written to CSV file


In [5]:
# Viewing Data
# Display the first few rows of the DataFrame
print("First few rows of the DataFrame:\n", df.head())

# Display the last few rows of the DataFrame
print("Last few rows of the DataFrame:\n", df.tail())


First few rows of the DataFrame:
     Name  Age         City
0   John   23     New York
1  Alice   30  Los Angeles
2    Bob   25      Chicago
Last few rows of the DataFrame:
     Name  Age         City
0   John   23     New York
1  Alice   30  Los Angeles
2    Bob   25      Chicago


In [6]:
# Selecting Columns
# Select a single column
age_column = df['Age']
print("Age column:\n", age_column)

# Select multiple columns
name_city_columns = df[['Name', 'City']]
print("Name and City columns:\n", name_city_columns)


Age column:
 0    23
1    30
2    25
Name: Age, dtype: int64
Name and City columns:
     Name         City
0   John     New York
1  Alice  Los Angeles
2    Bob      Chicago


In [7]:
# Descriptive Statistics
# Get descriptive statistics for numerical columns
print("Descriptive statistics:\n", df.describe())


Descriptive statistics:
              Age
count   3.000000
mean   26.000000
std     3.605551
min    23.000000
25%    24.000000
50%    25.000000
75%    27.500000
max    30.000000


In [8]:
# Filtering Rows
# Filter rows based on a condition
age_filter = df[df['Age'] > 25]
print("Rows where age is greater than 25:\n", age_filter)


Rows where age is greater than 25:
     Name  Age         City
1  Alice   30  Los Angeles


In [9]:
# Adding a New Column
# Add a new column to the DataFrame
df['Salary'] = [50000, 60000, 55000]
print("DataFrame with new Salary column:\n", df)


DataFrame with new Salary column:
     Name  Age         City  Salary
0   John   23     New York   50000
1  Alice   30  Los Angeles   60000
2    Bob   25      Chicago   55000


In [10]:
# Dropping Columns
# Drop a column from the DataFrame
df_dropped = df.drop(columns=['Salary'])
print("DataFrame after dropping Salary column:\n", df_dropped)


DataFrame after dropping Salary column:
     Name  Age         City
0   John   23     New York
1  Alice   30  Los Angeles
2    Bob   25      Chicago


In [11]:
# Renaming Columns
# Rename columns in the DataFrame
df_renamed = df.rename(columns={'Name': 'Full Name', 'City': 'Location'})
print("DataFrame with renamed columns:\n", df_renamed)


DataFrame with renamed columns:
   Full Name  Age     Location  Salary
0      John   23     New York   50000
1     Alice   30  Los Angeles   60000
2       Bob   25      Chicago   55000


In [12]:
# Handling Missing Values
# Fill missing values with a specified value
df_missing = df.copy()
df_missing.loc[1, 'Age'] = None
df_filled = df_missing.fillna(0)
print("DataFrame with missing values filled:\n", df_filled)

# Drop rows with missing values
df_dropped_na = df_missing.dropna()
print("DataFrame with rows containing missing values dropped:\n", df_dropped_na)


DataFrame with missing values filled:
     Name   Age         City  Salary
0   John  23.0     New York   50000
1  Alice   0.0  Los Angeles   60000
2    Bob  25.0      Chicago   55000
DataFrame with rows containing missing values dropped:
    Name   Age      City  Salary
0  John  23.0  New York   50000
2   Bob  25.0   Chicago   55000


In [13]:
# Grouping Data
# Group data by a column and calculate aggregate statistics
grouped = df.groupby('City').mean()
print("Grouped data with mean age:\n", grouped)


Grouped data with mean age:
               Age   Salary
City                      
Chicago      25.0  55000.0
Los Angeles  30.0  60000.0
New York     23.0  50000.0


  grouped = df.groupby('City').mean()


In [14]:
# Merging DataFrames
# Merge two DataFrames
data2 = {'Name': ['John', 'Alice', 'Bob'],
         'Salary': [50000, 60000, 55000]}
df2 = pd.DataFrame(data2)
merged_df = pd.merge(df, df2, on='Name')
print("Merged DataFrame:\n", merged_df)


Merged DataFrame:
     Name  Age         City  Salary_x  Salary_y
0   John   23     New York     50000     50000
1  Alice   30  Los Angeles     60000     60000
2    Bob   25      Chicago     55000     55000


In [15]:
# Pivot Tables
# Create a pivot table from the DataFrame
pivot_table = df.pivot_table(values='Age', index='City', aggfunc='mean')
print("Pivot table:\n", pivot_table)


Pivot table:
              Age
City            
Chicago       25
Los Angeles   30
New York      23


In [16]:
# Sorting Data
# Sort the DataFrame by a column
sorted_df = df.sort_values(by='Age')
print("DataFrame sorted by age:\n", sorted_df)


DataFrame sorted by age:
     Name  Age         City  Salary
0   John   23     New York   50000
2    Bob   25      Chicago   55000
1  Alice   30  Los Angeles   60000


In [17]:
# Applying Functions
# Apply a function to each element in a column
df['Age in Months'] = df['Age'].apply(lambda x: x * 12)
print("DataFrame with Age in Months column:\n", df)


DataFrame with Age in Months column:
     Name  Age         City  Salary  Age in Months
0   John   23     New York   50000            276
1  Alice   30  Los Angeles   60000            360
2    Bob   25      Chicago   55000            300
