### Assignment 1: DataFrame Creation and Indexing 

In [2]:
# 1. Create a Pandas DataFrame with 4 columns and 6 rows filled with random integers.
import numpy as np
import pandas as pd

df = pd.DataFrame(np.random.randint(1,100, size=(6,4)), columns = ['A','B','C','D'])
print('Original DataFrame: ')
print(df)

## Set the index to be the first column.
df.set_index('A', inplace=True)
print('DataFrame with new index : ')
print(df)

Original DataFrame: 
    A   B   C   D
0  32  48  60  55
1  73  61  58  61
2  26  67  51  36
3  70  49  18  81
4  20  31   8  43
5  33  21   1  53
DataFrame with new index : 
     B   C   D
A             
32  48  60  55
73  61  58  61
26  67  51  36
70  49  18  81
20  31   8  43
33  21   1  53


In [3]:
# 2. Create a Pandas DataFrame with columns 'A', 'B', 'C' and index 'X', 'Y', 'Z'.
df = pd.DataFrame(np.random.randint(1,7, size=(3,3)), columns=['A','B','C'], index=['X','Y','Z'])
print('Original DataFrame:')
print(df)

## Fill the DataFrame with random integers and access the element at row 'Y' and column 'B'.
print(f"Element at Column 'B' and Row 'Y' : {df.at['Y', 'B']}")

Original DataFrame:
   A  B  C
X  2  4  3
Y  5  5  3
Z  3  3  6
Element at Column 'B' and Row 'Y' : 5


### Assignment 2: DataFrame Operations

In [6]:
# 1. Create a Pandas DataFrame with 3 columns and 5 rows filled with random integers.
df = pd.DataFrame(np.random.randint(1,100, size=(5,3)), columns=['A','B','C'])
print('Original DataFrame:')
print(df)

## Add a new column that is the product of the first two columns.
df['D'] = df['A']*df['B']
print("DataFrame with new column 'D':")
print(df)

Original DataFrame:
    A   B   C
0  80  26  81
1  74  44  14
2  26  65  61
3  81  82  70
4  37  72   3
DataFrame with new column 'D':
    A   B   C     D
0  80  26  81  2080
1  74  44  14  3256
2  26  65  61  1690
3  81  82  70  6642
4  37  72   3  2664


In [16]:
# 2. Create a Pandas DataFrame with 3 columns and 4 rows filled with random integers.
df = pd.DataFrame(np.random.randint(1,100, size=(4,3)), columns=['A','B','C'])
print('DataFrame:')
print(df)

## Compute the row-wise and column-wise sum
print(f'\nRow sum:\n{df.sum(axis=1)}\n\nColumn sum:\n{df.sum(axis=0)}')

DataFrame:
    A   B   C
0  50  67  18
1  38  63  95
2  69  69  67
3  99  62  39

Row sum:
0    135
1    196
2    205
3    200
dtype: int64

Column sum:
A    256
B    261
C    219
dtype: int64


### Assignment 3: Data Cleaning

In [3]:
# 1. Create a Pandas DataFrame with 3 columns and 5 rows filled with random integers.
df = pd.DataFrame(np.random.randint(1,100, size=(5,3)), columns=['A','B','C'])
print('Original DataFrame:')
print(df)

## Introduce some NaN values.
df.iloc[0, 1] = np.nan
df.iloc[2, 2] = np.nan
df.iloc[4, 0] = np.nan
print('\nDataFrame with NaN values:')
print(df)

## Fill the NaN values with the mean of the respective columns.
df.fillna(df.mean(), inplace=True)
print('\nDataFrame with NaN values filled:')
print(df)

Original DataFrame:
    A   B   C
0  87  94  92
1   3  14   8
2  74  73   1
3  38  95   8
4  22  12   6

DataFrame with NaN values:
      A     B     C
0  87.0   NaN  92.0
1   3.0  14.0   8.0
2  74.0  73.0   NaN
3  38.0  95.0   8.0
4   NaN  12.0   6.0

DataFrame with NaN values filled:
      A     B     C
0  87.0  48.5  92.0
1   3.0  14.0   8.0
2  74.0  73.0  28.5
3  38.0  95.0   8.0
4  50.5  12.0   6.0


In [27]:
# 2. Create a Pandas DataFrame with 4 columns and 6 rows filled with random integers.
df = pd.DataFrame(np.random.randint(1,100,size=(6,4)), columns=['A','B','C','D'])
print('Original DataFrame:')
print(df)

## Introduce some NaN values.
df.iloc[0,2] = np.nan
df.iloc[3,3] = np.nan
df.iloc[5,1] = np.nan
print('\nDataFrame with NaN values:')
print(df)

## Drop the rows with any NaN values.
print('\nDataFrame with dropped NaN values:')
df.dropna(inplace=True)
print(df)

Original DataFrame:
    A   B   C   D
0  27  13  62  39
1  93  57  85   4
2   1  82  30   2
3  89   3  37  71
4  41  16   4  76
5  98  24  86   9

DataFrame with NaN values:
    A     B     C     D
0  27  13.0   NaN  39.0
1  93  57.0  85.0   4.0
2   1  82.0  30.0   2.0
3  89   3.0  37.0   NaN
4  41  16.0   4.0  76.0
5  98   NaN  86.0   9.0

DataFrame with dropped NaN values:
    A     B     C     D
1  93  57.0  85.0   4.0
2   1  82.0  30.0   2.0
4  41  16.0   4.0  76.0


### Assignment 4: Data Aggregation

In [30]:
# 1. Create a Pandas DataFrame with 2 columns: 'Category' and 'Value'. 
## Fill the 'Category' column with random categories ('A', 'B', 'C') and the 'Value' column with random integers.
df = pd.DataFrame({'Category': np.random.choice(['A','B','C'],size=10), 'Value': np.random.randint(1, 100, size=10)})
print('DataFrame:')
print(df)

## Group the DataFrame by 'Category' and compute the sum and mean of 'Value' for each category.
grouped = df.groupby('Category')['Value'].agg(['sum', 'mean'])
print('\nGrouped DataFrame:')
print(grouped)

DataFrame:
  Category  Value
0        C      9
1        A     12
2        B     26
3        C      7
4        B     71
5        A     14
6        A     29
7        A     77
8        A     19
9        C     48

Grouped DataFrame:
          sum       mean
Category                
A         151  30.200000
B          97  48.500000
C          64  21.333333


In [37]:
# 2. Create a Pandas DataFrame with 3 columns: 'Product', 'Category', and 'Sales'.
## Fill the DataFrame with random data.
df = pd.DataFrame({'Product':np.random.choice(['Prod1','Prod2','Prod3'], size=10),
                   'Category':np.random.choice(['A','B','C'], size=10),
                   'Values':np.random.randint(1,100, size=10)})
print('DataFrame:')
print(df)

## Group the DataFrame by 'Category' and compute the total sales for each category.
grouped = df.groupby('Category')['Values'].agg(['sum', 'mean'])
print('\nGrouped DataFrame:')
print(grouped)

DataFrame:
  Product Category  Values
0   Prod1        A      58
1   Prod2        A      79
2   Prod3        C      98
3   Prod1        B      61
4   Prod2        B      42
5   Prod2        C      34
6   Prod3        C      84
7   Prod1        B      40
8   Prod2        B      59
9   Prod3        C      46

Grouped DataFrame:
          sum  mean
Category           
A         137  68.5
B         202  50.5
C         262  65.5


### Assignment 5: Merging DataFrames

In [40]:
# 1. Create two Pandas DataFrames with a common column.
df1 = pd.DataFrame({'Key': ['A','B','C','D'], 'Value1': np.random.randint(1,100,size=4)})
df2 = pd.DataFrame({'Key': ['A','B','C','E'], 'Value2': np.random.randint(1,100,size=4)})
print('DataFrame 1:')
print(df1)
print('\nDataFrame 2:')
print(df2)

## Merge the DataFrames using the common column.
merged = pd.merge(df1, df2, on='Key')
print('\nMerged DataFrame:')
print(merged)

DataFrame 1:
  Key  Value1
0   A      79
1   B      98
2   C      20
3   D      74

DataFrame 2:
  Key  Value2
0   A      99
1   B      85
2   C      78
3   E      53

Merged DataFrame:
  Key  Value1  Value2
0   A      79      99
1   B      98      85
2   C      20      78


In [47]:
# 2. Create two Pandas DataFrames with different columns.
df1 = pd.DataFrame({'A': np.random.randint(1,100,size=3), 'B': np.random.randint(1,100,size=3)})
df2 = pd.DataFrame({'C': np.random.randint(1,100,size=3), 'D': np.random.randint(1,100,size=3)})
print('DataFrame 1:')
print(df1)
print('\nDataFrame 2:')
print(df2)

## Concatenate the DataFrames along the rows and along the columns.
row_concat = pd.concat([df1, df2], axis=0)
print('\nConcatenated DataFrame (rows):')
print(row_concat)

col_concat = pd.concat([df1, df2], axis=1)
print('\nConcatenated DataFrame (columns):')
print(col_concat)

DataFrame 1:
    A   B
0  77  22
1  19  57
2  26  48

DataFrame 2:
    C   D
0  68  46
1  42  95
2   2  56

Concatenated DataFrame (rows):
      A     B     C     D
0  77.0  22.0   NaN   NaN
1  19.0  57.0   NaN   NaN
2  26.0  48.0   NaN   NaN
0   NaN   NaN  68.0  46.0
1   NaN   NaN  42.0  95.0
2   NaN   NaN   2.0  56.0

Concatenated DataFrame (columns):
    A   B   C   D
0  77  22  68  46
1  19  57  42  95
2  26  48   2  56


### Assignment 6: Time Series Analysis

In [9]:
# 1. Create a Pandas DataFrame with a datetime index and one column filled with random integers.
date_rng = pd.date_range(start='2026-01-01', end='2026-12-31', freq='D')
df = pd.DataFrame(date_rng, columns=['date'])
df['data'] = np.random.randint(0, 100, size=(len(date_rng)))
df.set_index('date', inplace=True)
print('DataFrame:')
print(df)

## Resample the DataFrame to compute the monthly mean of the values.
monthly_mean = df.resample('M').mean()
print('\nMonthly Mean:')
print(monthly_mean)

DataFrame:
            data
date            
2026-01-01     2
2026-01-02    38
2026-01-03    42
2026-01-04    98
2026-01-05    28
...          ...
2026-12-27    10
2026-12-28    43
2026-12-29    78
2026-12-30    32
2026-12-31    42

[365 rows x 1 columns]

Monthly Mean:
                 data
date                 
2026-01-31  48.548387
2026-02-28  55.928571
2026-03-31  45.483871
2026-04-30  52.733333
2026-05-31  44.483871
2026-06-30  43.633333
2026-07-31  48.483871
2026-08-31  54.709677
2026-09-30  47.133333
2026-10-31  42.290323
2026-11-30  57.500000
2026-12-31  44.838710


  monthly_mean = df.resample('M').mean()


In [16]:
# 2. Create a Pandas DataFrame with a datetime index ranging from '2021-01-01' to '2021-12-31' and one column filled with random integers.
date_rng = pd.date_range(start='2021-01-01', end='2021-12-31', freq='D')
df = pd.DataFrame(date_rng, columns=['date'])
df['data'] = np.random.randint(0, 100, size=(len(date_rng)))
df.set_index('date', inplace=True)
print('Original DataFrame:')
print(df)
 
## Compute the rolling mean with a window of 7 days.
rolling_mean = df.rolling(window=7).mean()
print('\nRolling mean DataFrame:')
print(rolling_mean)

Original DataFrame:
            data
date            
2021-01-01    86
2021-01-02     4
2021-01-03     5
2021-01-04    94
2021-01-05     7
...          ...
2021-12-27    67
2021-12-28    94
2021-12-29     0
2021-12-30    87
2021-12-31    28

[365 rows x 1 columns]

Rolling mean DataFrame:
                 data
date                 
2021-01-01        NaN
2021-01-02        NaN
2021-01-03        NaN
2021-01-04        NaN
2021-01-05        NaN
...               ...
2021-12-27  52.857143
2021-12-28  60.000000
2021-12-29  55.000000
2021-12-30  54.285714
2021-12-31  53.285714

[365 rows x 1 columns]


### Assignment 7: MultiIndex DataFrame

In [22]:
# 1. Create a Pandas DataFrame with a MultiIndex (hierarchical index).
arr = [['A','A','B','B'], ['one', 'two', 'one', 'two']]
ind = pd.MultiIndex.from_arrays(arr, names=('Category', 'Subcategory'))
df = pd.DataFrame(np.random.randint(0,100,size=(4,3)), index=ind, columns=['Value1', 'Value2', 'Value3'])
print('MultiIndex DataFrame:\n')
print(df)

# Perform some basic indexing and slicing operations on the MultiIndex DataFrame.
print("\nSlicing at Category 'A':")
print(df.loc['A'])

print("\nSlicing at Category 'B' with Subcategory 'two':")
print(df.loc[('B', 'two')])

MultiIndex DataFrame:

                      Value1  Value2  Value3
Category Subcategory                        
A        one              34      52      40
         two              68      68      14
B        one              82      92      19
         two              82      75      39

Slicing at Category 'A':
             Value1  Value2  Value3
Subcategory                        
one              34      52      40
two              68      68      14

Slicing at Category 'B' with Subcategory 'two':
Value1    82
Value2    75
Value3    39
Name: (B, two), dtype: int32


In [27]:
# 2. Create a Pandas DataFrame with MultiIndex consisting of 'Category' and 'SubCategory'. 
arr = [['A','A','B','B','C','C'], ['one','two','one','two','one','two']]
ind = pd.MultiIndex.from_arrays(arr, names=['Category', 'Subcategory'])
df = pd.DataFrame(np.random.randint(1,100,size=(6,3)), index=ind, columns=['Value1', 'Value2', 'Value3'])
print('MultiIndex DataFrame:')
print(df)

## Fill the DataFrame with random data and compute the sum of values for each 'Category' and 'SubCategory'.
sum_values = df.groupby(['Category', 'Subcategory']).sum()
print('\nSum of values:')
print(sum_values)

MultiIndex DataFrame:
                      Value1  Value2  Value3
Category Subcategory                        
A        one              65      63       5
         two              92      96      55
B        one              85      24      19
         two              71      31       6
C        one              42       3      16
         two              56      42      47

Sum of values:
                      Value1  Value2  Value3
Category Subcategory                        
A        one              65      63       5
         two              92      96      55
B        one              85      24      19
         two              71      31       6
C        one              42       3      16
         two              56      42      47


### Assignment 8: Pivot Tables

1. Create a Pandas DataFrame with columns 'Date', 'Category', and 'Value'. Create a pivot table to compute the sum of 'Value' for each 'Category' by 'Date'.
2. Create a Pandas DataFrame with columns 'Year', 'Quarter', and 'Revenue'. Create a pivot table to compute the mean 'Revenue' for each 'Quarter' by 'Year'.

In [None]:
# 1. Create a Pandas DataFrame with columns 'Date', 'Category', and 'Value'.
date_rng = pd.date_range(start='2026-01-01', end='2026-01-10', freq='D')
df = pd.DataFrame({'Date': np.random.choice(date_rng, size=20), 
                   'Category': np.random.choice(['A','B','C'], size=20), 
                   'Value': np.random.randint(1,100,size=20)})
print('DataFrame:')
print(df)

## Create a pivot table to compute the sum of 'Value' for each 'Category' by 'Date'.
pivot_table = df.pivot_table(values="Value", index='Date', columns='Category', aggfunc='sum')
print('\nPivot Table:')
print(pivot_table)

DataFrame:
         Date Category  Value
0  2026-01-09        A     87
1  2026-01-08        A     37
2  2026-01-03        A     28
3  2026-01-06        B     53
4  2026-01-03        A      5
5  2026-01-07        A     70
6  2026-01-04        C     98
7  2026-01-10        B     55
8  2026-01-10        B     94
9  2026-01-06        B     16
10 2026-01-01        B     96
11 2026-01-10        A     45
12 2026-01-03        B     79
13 2026-01-06        A     63
14 2026-01-01        A     83
15 2026-01-02        A     37
16 2026-01-07        C     25
17 2026-01-03        C     56
18 2026-01-01        A     66
19 2026-01-06        B     77

Pivot Table:
Category        A      B     C
Date                          
2026-01-01  149.0   96.0   NaN
2026-01-02   37.0    NaN   NaN
2026-01-03   33.0   79.0  56.0
2026-01-04    NaN    NaN  98.0
2026-01-06   63.0  146.0   NaN
2026-01-07   70.0    NaN  25.0
2026-01-08   37.0    NaN   NaN
2026-01-09   87.0    NaN   NaN
2026-01-10   45.0  149.0   NaN


In [10]:
# 2. Create a Pandas DataFrame with columns 'Year', 'Quarter', and 'Revenue'.
df = pd.DataFrame({'Year': np.random.choice([2024, 2025, 2026], size=12), 'Quarter': np.random.choice(['Q1','Q2','Q3','Q4'], size=12), 'Revenue': np.random.randint(1,1000, size=12)})
print('DataFrame:')
print(df)

## Create a pivot table to compute the mean 'Revenue' for each 'Quarter' by 'Year'.
pivot_table = df.pivot_table(values='Revenue', index='Year', columns='Quarter', aggfunc='mean')
print('\nPivot Table:')
print(pivot_table)

DataFrame:
    Year Quarter  Revenue
0   2024      Q1      276
1   2026      Q1      751
2   2025      Q1      487
3   2024      Q2      231
4   2024      Q3      687
5   2026      Q4      198
6   2025      Q3      947
7   2026      Q1      788
8   2025      Q2      974
9   2024      Q1      550
10  2024      Q3      204
11  2025      Q1       54

Pivot Table:
Quarter     Q1     Q2     Q3     Q4
Year                               
2024     413.0  231.0  445.5    NaN
2025     270.5  974.0  947.0    NaN
2026     769.5    NaN    NaN  198.0


### Assignment 9: Applying Functions

In [11]:
# 1. Create a Pandas DataFrame with 3 columns and 5 rows filled with random integers.
df = pd.DataFrame(np.random.randint(1,100, size=(5,3)))
print('DataFrame:')
print(df)

## Apply a function that doubles the values of the DataFrame.
df_doubled = df.applymap(lambda x:x*2)
print('\nDoubled DataFrame:')
print(df_doubled)

DataFrame:
    0   1   2
0  52  97  34
1  48   1  11
2  59  25  76
3   4   6  45
4  55  84  19

Doubled DataFrame:
     0    1    2
0  104  194   68
1   96    2   22
2  118   50  152
3    8   12   90
4  110  168   38


  df_doubled = df.applymap(lambda x:x*2)


In [12]:
#2. Create a Pandas DataFrame with 3 columns and 6 rows filled with random integers.
df = pd.DataFrame(np.random.randint(1,100,size=(6,3)))
print('DataFrame:')
print(df)

## Apply a lambda function to create a new column that is the sum of the existing columns.
df['Sum'] = df.apply(lambda i:i.sum(), axis=1)
print('\nDataFrame with Sum Column:')
print(df)

DataFrame:
    0   1   2
0  57  50  13
1  63  82  17
2  83  66  78
3  46  85  40
4  24  94  78
5  55  63  90

DataFrame with Sum Column:
    0   1   2  Sum
0  57  50  13  120
1  63  82  17  162
2  83  66  78  227
3  46  85  40  171
4  24  94  78  196
5  55  63  90  208


### Assignment 10: Working with Text Data

In [15]:
# 1. Create a Pandas Series with 5 random text strings.
df = pd.Series(['apple', 'banana', 'cherry', 'date', 'elderberry'])
print('Series : ')
print(df)

## Convert all the strings to uppercase.
upper_df = df.str.upper()
print('\nUpper case :')
print(upper_df)

Series : 
0         apple
1        banana
2        cherry
3          date
4    elderberry
dtype: object

Upper case :
0         APPLE
1        BANANA
2        CHERRY
3          DATE
4    ELDERBERRY
dtype: object


In [17]:
# 2. Create a Pandas Series with 5 random text strings.
df = pd.Series(['apple', 'banana', 'cherry', 'date', 'elderberry'])
print('Series :')
print(df)

## Extract the first three characters of each string.
print('\nSeries of only first 3 charecters :')
print(df.str[:3])

Series :
0         apple
1        banana
2        cherry
3          date
4    elderberry
dtype: object

Series of only first 3 charecters :
0    app
1    ban
2    che
3    dat
4    eld
dtype: object
