In [316]:
import numpy as np
import pandas as pd

# Exercice 1 : Creating and Modifying Series
* Create a Pandas Series from a dictionary where keys are ['a', 'b', 'c'] and values are [100, 200, 300].

In [317]:
data = {'a': 100, 'b': 200, 'c': 300}

series = pd.Series(data)

print(series)

a    100
b    200
c    300
dtype: int64


# Exercice 2 : Creating DataFrames
Create a DataFrame from the following data:

In [318]:

df = pd.DataFrame({
    "A": [1, 4, 7],
    "B": [2, 5, 8],
    "C": [3, 6, 9]
})

df

Unnamed: 0,A,B,C
0,1,2,3
1,4,5,6
2,7,8,9


Modify the code to add a new column D with values [10, 11, 12].

Drop column B from the DataFrame and display the result.

In [319]:
df['D'] = [10, 11, 12]

df

Unnamed: 0,A,B,C,D
0,1,2,3,10
1,4,5,6,11
2,7,8,9,12


In [320]:
df.drop('B', axis=1, inplace=True)

df

Unnamed: 0,A,C,D
0,1,3,10
1,4,6,11
2,7,9,12


# Exercice 3 : DataFrame Indexing and Selection
Select column B from the following DataFrame:

In [321]:
df = pd.DataFrame({
    "A": [1, 4, 7],
    "B": [2, 5, 8],
    "C": [3, 6, 9]
})

df['B']

0    2
1    5
2    8
Name: B, dtype: int64

Modify the code to select both columns A and C.

Select the row with index 1 using the .loc method.

In [322]:
df_ac = df[['A', 'C']].loc[1]

df_ac

A    4
C    6
Name: 1, dtype: int64

# Exercice 4 : Adding and Removing DataFrame Elements
Add a new column Sum to the DataFrame which is the sum of columns A, B, and C.


In [323]:
df['Sum'] = df['A'] + df['B'] + df['C']

df

Unnamed: 0,A,B,C,Sum
0,1,2,3,6
1,4,5,6,15
2,7,8,9,24


Remove the column Sum from the DataFrame.


In [324]:
df.drop("Sum", axis=1, inplace=True)

df

Unnamed: 0,A,B,C
0,1,2,3
1,4,5,6
2,7,8,9


Add a column Random with random numbers generated using numpy.

In [325]:
df['Random'] = np.random.randint(1, 10, size=3)

df

Unnamed: 0,A,B,C,Random
0,1,2,3,5
1,4,5,6,6
2,7,8,9,7


# Exercice 5 : Merging DataFrames
Merge the following two DataFrames on the key column:

In [326]:
left = pd.DataFrame({
    "key": [1, 2, 3],
    "A": ["A1", "A2", "A3"],
    "B": ["B1", "B2", "B3"]
})

right = pd.DataFrame({
    "key": [1, 2, 3],
    "C": ['C1', 'C2', 'C3'],
    "D": ['D1', 'D2', 'D3']
})

print(left)
print(right)

   key   A   B
0    1  A1  B1
1    2  A2  B2
2    3  A3  B3
   key   C   D
0    1  C1  D1
1    2  C2  D2
2    3  C3  D3


In [327]:
pd.merge(left, right, on='key')

Unnamed: 0,key,A,B,C,D
0,1,A1,B1,C1,D1
1,2,A2,B2,C2,D2
2,3,A3,B3,C3,D3



Modify the merge to use an outer join instead of an inner join.


In [328]:
pd.merge(left, right, on='key', how='outer')

Unnamed: 0,key,A,B,C,D
0,1,A1,B1,C1,D1
1,2,A2,B2,C2,D2
2,3,A3,B3,C3,D3


Add a new column E to the right DataFrame and update the merge to include this new column.



In [329]:
right['E'] = ["E1", "E2", "E3"]
pd.merge(left, right, on='key', how='outer')

Unnamed: 0,key,A,B,C,D,E
0,1,A1,B1,C1,D1,E1
1,2,A2,B2,C2,D2,E2
2,3,A3,B3,C3,D3,E3


# Exercice 6 : Data Cleaning
Replace all NaN values in the following DataFrame with the value 0:

In [330]:
df = pd.DataFrame({
    "A": [1.0, np.NaN, 3.0],
    "B": [np.NaN, 5.0, 6.0],
    "C": [7.0, 8.0,np.NaN]
})
df.fillna(0, inplace=True)

df

Unnamed: 0,A,B,C
0,1.0,0.0,7.0
1,0.0,5.0,8.0
2,3.0,6.0,0.0


Modify the code to replace NaN values with the mean of the column.

In [331]:
df.fillna(df.mean(), inplace=True)

df


Unnamed: 0,A,B,C
0,1.0,0.0,7.0
1,0.0,5.0,8.0
2,3.0,6.0,0.0


Drop rows where any value is NaN.



In [332]:
df = pd.DataFrame({
    "A": [1.0, np.NaN, 3.0],
    "B": [np.NaN, 5.0, 6.0],
    "C": [7.0, 8.0,np.NaN]
})


df = df.dropna()

df

Unnamed: 0,A,B,C


# Exercice 7 : Grouping and Aggregation
Group the following DataFrame by column Category and calculate the mean of column Value:

In [333]:
df = pd.DataFrame({
    "Category": ["A", "B", "A", "B", "A", "B"],
    "Value": [1, 2, 3, 4, 5, 6]
})

df


Unnamed: 0,Category,Value
0,A,1
1,B,2
2,A,3
3,B,4
4,A,5
5,B,6


In [334]:

mean = df.groupby("Category")["Value"].mean().reset_index()
mean

Unnamed: 0,Category,Value
0,A,3.0
1,B,4.0


Modify the code to calculate the sum instead of the mean.

In [335]:
df.groupby("Category")["Value"].sum().reset_index()



Unnamed: 0,Category,Value
0,A,9
1,B,12


Add margins to the pivot table to show the total mean for each Category and Type.



In [336]:


df.groupby("Category").size().reset_index(name='Count')


Unnamed: 0,Category,Count
0,A,3
1,B,3


# Exercice 8 : Pivot Tables
Create a pivot table from the following DataFrame, showing the mean Value for each Category and Type:

In [337]:
df = pd.DataFrame({
    "Category": ["A", "A", "A", "B", "B",  "B"],
    "Type": ["X", "Y", "X", "Y", "X",  "Y"],
    "Value": [1, 2, 3, 4, 5, 6]
})

df

Unnamed: 0,Category,Type,Value
0,A,X,1
1,A,Y,2
2,A,X,3
3,B,Y,4
4,B,X,5
5,B,Y,6


In [338]:
df.groupby("Category")["Value"].mean().reset_index()


Unnamed: 0,Category,Value
0,A,2.0
1,B,5.0


Modify the pivot table to show the sum of Value instead of the mean.


In [339]:
df.groupby("Category")["Value"].sum().reset_index()


Unnamed: 0,Category,Value
0,A,6
1,B,15


Add margins to the pivot table to show the total mean for each Category and Type.



In [340]:
pd.pivot_table(df, values="Value", index="Category", columns="Type", aggfunc="mean", margins=True)

Type,X,Y,All
Category,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
A,2.0,2.0,2.0
B,5.0,5.0,5.0
All,3.0,4.0,3.5


# Exercice 9 : Time Series Data
Create a time series DataFrame with a date range starting from '2023-01-01' for 6 periods and random values.



In [341]:
df = pd.date_range(start='2023-01-01', periods=6, freq='D')

df = pd.DataFrame({
    'Date': df,
    'Value': np.random.randint(1, 100, size=6)
})


Set the date column as the index of the DataFrame.



In [342]:
df.set_index('Date', inplace=True)

df


Unnamed: 0_level_0,Value
Date,Unnamed: 1_level_1
2023-01-01,34
2023-01-02,3
2023-01-03,54
2023-01-04,38
2023-01-05,48
2023-01-06,52


Resample the data to calculate the sum for each 2-day period.


In [343]:
resampled_df = df.resample('2D').sum()

print(resampled_df)

            Value
Date             
2023-01-01     37
2023-01-03     92
2023-01-05    100


# Exercice 10 : Handling Missing Data
Interpolate missing values in the following DataFrame:

In [344]:
df = pd.DataFrame({
    "A": [1.0, 2.0, np.NaN],
    "B": [ np.NaN, 5.0, 8.0],
    "C": [3.0, np.NaN, 9.0]
})

df

Unnamed: 0,A,B,C
0,1.0,,3.0
1,2.0,5.0,
2,,8.0,9.0


In [345]:
df_inter = df.interpolate()

df_inter

Unnamed: 0,A,B,C
0,1.0,,3.0
1,2.0,5.0,6.0
2,2.0,8.0,9.0


Drop rows with any NaN values instead of interpolating.



In [346]:
df_nan = df.dropna()

df_nan

Unnamed: 0,A,B,C


# Exercice 11 : DataFrame Operations
Calculate the cumulative sum of the following DataFrame:

In [347]:
df = pd.DataFrame({
    "A": [1, 4, 7],
    "B": [2, 5, 8],
    "C": [3, 6, 9]
})

df



Unnamed: 0,A,B,C
0,1,2,3
1,4,5,6
2,7,8,9


In [348]:
df_sum = df.cumsum()
df_sum

Unnamed: 0,A,B,C
0,1,2,3
1,5,7,9
2,12,15,18


Calculate the cumulative product of the DataFrame.


In [349]:
df_prod  = df.cumprod()

df_prod

Unnamed: 0,A,B,C
0,1,2,3
1,4,10,18
2,28,80,162



Apply a function to subtract 1 from all elements in the DataFrame.

In [350]:
df.applymap(lambda x: x - 1)


  df.applymap(lambda x: x - 1)


Unnamed: 0,A,B,C
0,0,1,2
1,3,4,5
2,6,7,8
