In [34]:
import pandas as pd
import numpy as np

data = {
    'DM' : [ 86, 77, 75, 78, 82, 95] ,
    'DSA' : [np.nan, 80, np.nan, 68, np.nan, 95] ,
    'FDS' : [96, 70, 65, 88, 72, 78] ,
    'ADEC' : [76, np.nan, 75, np.nan, 82, 87] ,
    'CEP' : [66, 80, 75, np.nan, 92, 95] ,
    'M2' : [50, np.nan, 85, np.nan, 52, 77]
}

#create the dataframe
df = pd.DataFrame(data)

#print the dataframe
print("Original dataset : ")
print(df)

Original dataset : 
   DM   DSA  FDS  ADEC   CEP    M2
0  86   NaN   96  76.0  66.0  50.0
1  77  80.0   70   NaN  80.0   NaN
2  75   NaN   65  75.0  75.0  85.0
3  78  68.0   88   NaN   NaN   NaN
4  82   NaN   72  82.0  92.0  52.0
5  95  95.0   78  87.0  95.0  77.0


In [38]:
#select the row you want to duplicate and replace it in first location
df.loc[0] = df.loc[5]
print("After duplicating rows: ")
print(df)

After duplicating rows: 
   DM   DSA  FDS  ADEC   CEP    M2
0  95  95.0   78  87.0  95.0  77.0
1  77  80.0   70   NaN  80.0   NaN
2  75   NaN   65  75.0  75.0  85.0
3  78  68.0   88   NaN   NaN   NaN
4  82   NaN   72  82.0  92.0  52.0
5  95  95.0   78  87.0  95.0  77.0


In [21]:
#calculate all the missing values in each columns
print("\n Missing values in each column : ")
print(df.isnull().sum())


 Missing values in each column : 
DM      0
DSA     2
FDS     0
ADEC    2
CEP     1
M2      2
dtype: int64


In [22]:
#row we select for duplicate
print("\n Duplicate Rows: ")
print(df[df.duplicated()])


 Duplicate Rows: 
   DM   DSA  FDS  ADEC   CEP    M2
5  95  95.0   78  87.0  95.0  77.0


In [23]:
#remove the duplicate row we selected
df_no_duplicates = df.drop_duplicates()
print("\n Dataset after removing Duplicates: ")
print(df_no_duplicates)


 Dataset after removing Duplicates: 
   DM   DSA  FDS  ADEC   CEP    M2
0  95  95.0   78  87.0  95.0  77.0
1  77  80.0   70   NaN  80.0   NaN
2  75   NaN   65  75.0  75.0  85.0
3  78  68.0   88   NaN   NaN   NaN
4  82   NaN   72  82.0  92.0  52.0


In [24]:
#dropping rows with missing values
df_dropna_rows = df_no_duplicates.dropna()
print("\n Dataset after dropping rows with Missing values : ")
print(df_dropna_rows)


 Dataset after dropping rows with Missing values : 
   DM   DSA  FDS  ADEC   CEP    M2
0  95  95.0   78  87.0  95.0  77.0


In [26]:
#dropping columns with missing values
df_dropna_columns = df_no_duplicates.dropna(axis=1)
print("\n Dataset after dropping columns with Missing values : ")
print(df_dropna_columns)


 Dataset after dropping columns with Missing values : 
   DM  FDS
0  95   78
1  77   70
2  75   65
3  78   88
4  82   72


In [40]:
#mean in dataframe of each column
mean = df.mean()
print("\n Mean of each column : ")
print(mean)


 Mean of each column : 
DM      83.666667
DSA     84.500000
FDS     75.166667
ADEC    82.750000
CEP     87.400000
M2      72.750000
dtype: float64


In [43]:
#Minimum value in dataframe of each column
min_value = df.min()
print("\n Minimum of each column : ")
print(min_value)


 Minimum of each column : 
DM      75.0
DSA     68.0
FDS     65.0
ADEC    75.0
CEP     75.0
M2      52.0
dtype: float64


In [44]:
#Maximum value in dataframe of each column
max_value = df.max()
print("\n Maximum of each column : ")
print(max_value)


 Maximum of each column : 
DM      95.0
DSA     95.0
FDS     88.0
ADEC    87.0
CEP     95.0
M2      85.0
dtype: float64


In [45]:
#fill the mean in the missing values
df_mean_filled = df_no_duplicates.copy()
df_mean_filled['DSA'] = df_mean_filled['DSA'].fillna(df_mean_filled['DSA'].mean())
df_mean_filled['ADEC'] = df_mean_filled['ADEC'].fillna(df_mean_filled['ADEC'].mean())
df_mean_filled['CEP'] = df_mean_filled['CEP'].fillna(df_mean_filled['CEP'].mean())
df_mean_filled['M2'] = df_mean_filled['M2'].fillna(df_mean_filled['M2'].mean())
print("\n Missing values filed for mean (and mode for categorical) : ")
print(df_mean_filled)


 Missing values filed for mean (and mode for categorical) : 
   DM   DSA  FDS       ADEC   CEP         M2
0  95  95.0   78  87.000000  95.0  77.000000
1  77  80.0   70  81.333333  80.0  71.333333
2  75  81.0   65  75.000000  75.0  85.000000
3  78  68.0   88  81.333333  85.5  71.333333
4  82  81.0   72  82.000000  92.0  52.000000


In [51]:
#fill the maximum in the missing values
df_max_filled = df_no_duplicates.copy()
df_max_filled['DSA'] = df_max_filled['DSA'].fillna(df_max_filled['DSA'].max())
df_max_filled['ADEC'] = df_max_filled['ADEC'].fillna(df_max_filled['ADEC'].max())
df_max_filled['CEP'] = df_max_filled['CEP'].fillna(df_max_filled['CEP'].max())
df_max_filled['M2'] = df_max_filled['M2'].fillna(df_max_filled['M2'].max())
print("\n Missing values filed for max : ")
print(df_max_filled)


 Missing values filed for max : 
   DM   DSA  FDS  ADEC   CEP    M2
0  95  95.0   78  87.0  95.0  77.0
1  77  80.0   70  87.0  80.0  85.0
2  75  95.0   65  75.0  75.0  85.0
3  78  68.0   88  87.0  95.0  85.0
4  82  95.0   72  82.0  92.0  52.0


In [52]:
#fill the minimum in the missing values
df_min_filled = df_no_duplicates.copy()
df_min_filled['DSA'] = df_min_filled['DSA'].fillna(df_min_filled['DSA'].min())
df_min_filled['ADEC'] = df_min_filled['ADEC'].fillna(df_min_filled['ADEC'].min())
df_min_filled['CEP'] = df_min_filled['CEP'].fillna(df_min_filled['CEP'].min())
df_min_filled['M2'] = df_min_filled['M2'].fillna(df_min_filled['M2'].min())
print("\n Missing values filed for min : ")
print(df_min_filled)


 Missing values filed for min : 
   DM   DSA  FDS  ADEC   CEP    M2
0  95  95.0   78  87.0  95.0  77.0
1  77  80.0   70  75.0  80.0  52.0
2  75  68.0   65  75.0  75.0  85.0
3  78  68.0   88  75.0  75.0  52.0
4  82  68.0   72  82.0  92.0  52.0
