In [1]:
import pandas as pd
import numpy as np

In [2]:
# Creating an empty Series
ser = pd.Series()
print(ser)

Series([], dtype: object)


In [3]:
# Creating a Series from a list of integers
ser = pd.Series([10, 20, 30, 40, 50])
print(ser)

0    10
1    20
2    30
3    40
4    50
dtype: int64


In [4]:
# Creating a Series from a list with different data types
ser = pd.Series([25, 14.89, 'Alice'])
print(ser)

0       25
1    14.89
2    Alice
dtype: object


In [5]:
# Creating a DataFrame from a dictionary
data_dict = {'Name': ['John', 'Anna', 'Peter'], 'Age': [28, 22, 29], 'Gender': ['M', 'F', 'M']}
df = pd.DataFrame(data_dict)
print(df)

    Name  Age Gender
0   John   28      M
1   Anna   22      F
2  Peter   29      M


In [6]:
# Adding a new column to the DataFrame
df['Department'] = ["HR", "IT", "Finance"]
print(df)

    Name  Age Gender Department
0   John   28      M         HR
1   Anna   22      F         IT
2  Peter   29      M    Finance


In [7]:
# Creating a dictionary with NaN values
data_dict = {'Name': ['John', 'Anna', np.nan], 'Age': [28, 22, 29], 'Gender': ['M', np.nan, 'F']}
print(data_dict)

{'Name': ['John', 'Anna', nan], 'Age': [28, 22, 29], 'Gender': ['M', nan, 'F']}


In [8]:
# Creating a DataFrame from the dictionary
df = pd.DataFrame(data_dict)
print(df)

   Name  Age Gender
0  John   28      M
1  Anna   22    NaN
2   NaN   29      F


In [9]:
# Dropping columns with NaN values
print("Dropping columns with NaN values")
print(df.dropna(axis=1))

Dropping columns with NaN values
   Age
0   28
1   22
2   29


In [10]:
# Displaying the DataFrame
print(df)

   Name  Age Gender
0  John   28      M
1  Anna   22    NaN
2   NaN   29      F


In [11]:
# Dropping rows with NaN values
print("Dropping rows with NaN values")
print(df.dropna(axis=0))

# Displaying the DataFrame
print(df)

Dropping rows with NaN values
   Name  Age Gender
0  John   28      M
   Name  Age Gender
0  John   28      M
1  Anna   22    NaN
2   NaN   29      F


In [12]:
# Filling NaN values with a specific value
print("Filling NaN values with 'Unknown'")
print(df.fillna('Unknown'))

Filling NaN values with 'Unknown'
      Name  Age   Gender
0     John   28        M
1     Anna   22  Unknown
2  Unknown   29        F


In [13]:
# Creating another dictionary with NaN values
data_dict = {'Product': ['Laptop', 'Mouse', np.nan], 'Price': [800, 25, 45], 'Stock': [20, np.nan, 100]}
print(data_dict)

# Creating a DataFrame from the dictionary
df = pd.DataFrame(data_dict)
print(df)

{'Product': ['Laptop', 'Mouse', nan], 'Price': [800, 25, 45], 'Stock': [20, nan, 100]}
  Product  Price  Stock
0  Laptop    800   20.0
1   Mouse     25    NaN
2     NaN     45  100.0


In [14]:
# Filling NaN values with a specific value
print("Filling NaN values with 0")
print(df.fillna(0))

# Displaying the DataFrame
print(df)

Filling NaN values with 0
  Product  Price  Stock
0  Laptop    800   20.0
1   Mouse     25    0.0
2       0     45  100.0
  Product  Price  Stock
0  Laptop    800   20.0
1   Mouse     25    NaN
2     NaN     45  100.0


In [15]:
# Filling NaN values with the mean of the column
print("Filling NaN values with the mean of the column")
df.fillna(value=df['Stock'].mean(), inplace=True)
print(df)

Filling NaN values with the mean of the column
  Product  Price  Stock
0  Laptop    800   20.0
1   Mouse     25   60.0
2    60.0     45  100.0


In [16]:
# Grouping by a column and calculating the mean
data = {'Company': ['Apple', 'Apple', 'Google', 'Google', 'Amazon', 'Amazon'],
        'Person': ['John', 'Mike', 'Sara', 'Anna', 'Bob', 'Linda'],
        'Sales': [200, 150, 300, 350, 500, 450]}
df = pd.DataFrame(data)
print(df)

by_company = df.groupby('Company')
print(by_company.mean(numeric_only=True))
print(by_company['Sales'].mean())

print(by_company['Sales'].sum())

  Company Person  Sales
0   Apple   John    200
1   Apple   Mike    150
2  Google   Sara    300
3  Google   Anna    350
4  Amazon    Bob    500
5  Amazon  Linda    450
         Sales
Company       
Amazon   475.0
Apple    175.0
Google   325.0
Company
Amazon    475.0
Apple     175.0
Google    325.0
Name: Sales, dtype: float64
Company
Amazon    950
Apple     350
Google    650
Name: Sales, dtype: int64


In [17]:
# Concatenating DataFrames row-wise
class_a = pd.DataFrame({'Math': [78, 85, 92, 88], 'Science': [72, 80, 95, 89]}, 
                       index=['Tom', 'Jerry', 'Mickey', 'Donald'])
class_b = pd.DataFrame({'Math': [80, 70, 88, 92], 'Science': [75, 85, 89, 94]}, 
                       index=['Bart', 'Lisa', 'Homer', 'Marge'])
class_c = pd.DataFrame({'Math': [68, 75, 80, 77], 'Science': [70, 78, 82, 85]}, 
                       index=['Spongebob', 'Patrick', 'Squidward', 'Sandy'])

print("Row-wise concatenation")
student_marks = pd.concat([class_a, class_b, class_c], axis=0)
print(student_marks)

Row-wise concatenation
           Math  Science
Tom          78       72
Jerry        85       80
Mickey       92       95
Donald       88       89
Bart         80       75
Lisa         70       85
Homer        88       89
Marge        92       94
Spongebob    68       70
Patrick      75       78
Squidward    80       82
Sandy        77       85


In [18]:
# Concatenating DataFrames column-wise
df = pd.concat([class_a, class_b, class_c], axis=1)
print("Column-wise concatenation")
print(df)

Column-wise concatenation
           Math  Science  Math  Science  Math  Science
Tom        78.0     72.0   NaN      NaN   NaN      NaN
Jerry      85.0     80.0   NaN      NaN   NaN      NaN
Mickey     92.0     95.0   NaN      NaN   NaN      NaN
Donald     88.0     89.0   NaN      NaN   NaN      NaN
Bart        NaN      NaN  80.0     75.0   NaN      NaN
Lisa        NaN      NaN  70.0     85.0   NaN      NaN
Homer       NaN      NaN  88.0     89.0   NaN      NaN
Marge       NaN      NaN  92.0     94.0   NaN      NaN
Spongebob   NaN      NaN   NaN      NaN  68.0     70.0
Patrick     NaN      NaN   NaN      NaN  75.0     78.0
Squidward   NaN      NaN   NaN      NaN  80.0     82.0
Sandy       NaN      NaN   NaN      NaN  77.0     85.0


In [19]:
# Filling NaN values with 0
df.fillna(0, inplace=True)
print(df)

           Math  Science  Math  Science  Math  Science
Tom        78.0     72.0   0.0      0.0   0.0      0.0
Jerry      85.0     80.0   0.0      0.0   0.0      0.0
Mickey     92.0     95.0   0.0      0.0   0.0      0.0
Donald     88.0     89.0   0.0      0.0   0.0      0.0
Bart        0.0      0.0  80.0     75.0   0.0      0.0
Lisa        0.0      0.0  70.0     85.0   0.0      0.0
Homer       0.0      0.0  88.0     89.0   0.0      0.0
Marge       0.0      0.0  92.0     94.0   0.0      0.0
Spongebob   0.0      0.0   0.0      0.0  68.0     70.0
Patrick     0.0      0.0   0.0      0.0  75.0     78.0
Squidward   0.0      0.0   0.0      0.0  80.0     82.0
Sandy       0.0      0.0   0.0      0.0  77.0     85.0


In [20]:
# Merging DataFrames on a column
player_id_name = pd.DataFrame({'ID': [7, 14, 21, 28], 'Name': ['Leo', 'Sam', 'Alex', 'Nina']})
print("Players")
print(player_id_name)

player_details = pd.DataFrame({'ID': [7, 14, 21, 28], 'Age': [23, 27, 21, 25], 'Country': ['Spain', 'Brazil', 'USA', 'Italy']})
print("Details")
print(player_details)

print("Merging based on IDs")
merged_df = pd.merge(player_id_name, player_details, on="ID")
print(merged_df)

Players
   ID  Name
0   7   Leo
1  14   Sam
2  21  Alex
3  28  Nina
Details
   ID  Age Country
0   7   23   Spain
1  14   27  Brazil
2  21   21     USA
3  28   25   Italy
Merging based on IDs
   ID  Name  Age Country
0   7   Leo   23   Spain
1  14   Sam   27  Brazil
2  21  Alex   21     USA
3  28  Nina   25   Italy


In [21]:
# Joining DataFrames on their index
left = pd.DataFrame({'X': ['X0', 'X1', 'X2'], 'Y': ['Y0', 'Y1', 'Y2']}, index=['A', 'B', 'C'])
right = pd.DataFrame({'Z': ['Z0', 'Z2', 'Z3'], 'W': ['W0', 'W2', 'W3']}, index=['A', 'C', 'D'])
print(left)
print(right)

print("Inner join")
print(left.join(right))

print("Outer join")
print(left.join(right, how='outer'))

    X   Y
A  X0  Y0
B  X1  Y1
C  X2  Y2
    Z   W
A  Z0  W0
C  Z2  W2
D  Z3  W3
Inner join
    X   Y    Z    W
A  X0  Y0   Z0   W0
B  X1  Y1  NaN  NaN
C  X2  Y2   Z2   W2
Outer join
     X    Y    Z    W
A   X0   Y0   Z0   W0
B   X1   Y1  NaN  NaN
C   X2   Y2   Z2   W2
D  NaN  NaN   Z3   W3
