In [1]:
import pandas as pd

In [2]:
df_A = pd.DataFrame({
    'Temp': [20, 30, 40, 50]},
    index = ['day 1', 'day 2', 'day 3', 'day 4']
)
df_B = pd.DataFrame({
    'Humidity': [120, 130, 140, 150]},
    index = ['day 1', 'day 2', 'day 5', 'day 7']
)

In [3]:
joined_df = df_A.join(df_B, how='left')
print(joined_df)

       Temp  Humidity
day 1    20     120.0
day 2    30     130.0
day 3    40       NaN
day 4    50       NaN


In [4]:
# Melt converts columns into rows

wide_df = pd.DataFrame({
    'Name': ['Ram', 'Shyam', 'Hari', 'Sita'],
    'Maths': [10, 20, 30, 50],
    'Science': [30, 35, 34, 46],
    'English': [12, 25, 32, 12],
})
print(wide_df)

    Name  Maths  Science  English
0    Ram     10       30       12
1  Shyam     20       35       25
2   Hari     30       34       32
3   Sita     50       46       12


In [5]:
long_df = pd.melt(
    frame= wide_df,
    id_vars= ['Name'],
    var_name= 'Subject',
    value_name= 'Marks'
)
print(long_df)

     Name  Subject  Marks
0     Ram    Maths     10
1   Shyam    Maths     20
2    Hari    Maths     30
3    Sita    Maths     50
4     Ram  Science     30
5   Shyam  Science     35
6    Hari  Science     34
7    Sita  Science     46
8     Ram  English     12
9   Shyam  English     25
10   Hari  English     32
11   Sita  English     12


In [6]:
# Pivot le long lai wide banauxa

feri_wide_df = long_df.pivot(
    index= 'Name',
    columns='Subject',
    values= 'Marks'
)
print(feri_wide_df)

Subject  English  Maths  Science
Name                            
Hari          32     30       34
Ram           12     10       30
Shyam         25     20       35
Sita          12     50       46


In [7]:
stacked_df = long_df.stack()
print(stacked_df)

0   Name           Ram
    Subject      Maths
    Marks           10
1   Name         Shyam
    Subject      Maths
    Marks           20
2   Name          Hari
    Subject      Maths
    Marks           30
3   Name          Sita
    Subject      Maths
    Marks           50
4   Name           Ram
    Subject    Science
    Marks           30
5   Name         Shyam
    Subject    Science
    Marks           35
6   Name          Hari
    Subject    Science
    Marks           34
7   Name          Sita
    Subject    Science
    Marks           46
8   Name           Ram
    Subject    English
    Marks           12
9   Name         Shyam
    Subject    English
    Marks           25
10  Name          Hari
    Subject    English
    Marks           32
11  Name          Sita
    Subject    English
    Marks           12
dtype: object


In [8]:
unstacked_df = stacked_df.unstack()
print(unstacked_df)

     Name  Subject Marks
0     Ram    Maths    10
1   Shyam    Maths    20
2    Hari    Maths    30
3    Sita    Maths    50
4     Ram  Science    30
5   Shyam  Science    35
6    Hari  Science    34
7    Sita  Science    46
8     Ram  English    12
9   Shyam  English    25
10   Hari  English    32
11   Sita  English    12


In [9]:
df_pivot_table = pd.pivot_table(
    data= long_df,
    index= 'Name',
    columns='Subject',
    values= 'Marks',
    aggfunc='median'
)
print(df_pivot_table)

Subject  English  Maths  Science
Name                            
Hari        32.0   30.0     34.0
Ram         12.0   10.0     30.0
Shyam       25.0   20.0     35.0
Sita        12.0   50.0     46.0


In [10]:
df = pd.read_csv("test.csv")
print(df)

   Roll   Name   Age     Address
0   1.0    Ram  22.0      Dharan
1   2.0  Shyam   NaN  Biratnagar
2   NaN   Hari  20.0      Damank
3   4.0   Sita   NaN    Janakpur
4   5.0  Manav  19.0      Dharan


In [11]:
print(df.isna())

    Roll   Name    Age  Address
0  False  False  False    False
1  False  False   True    False
2   True  False  False    False
3  False  False   True    False
4  False  False  False    False


In [12]:
print(df.isna().sum())

Roll       1
Name       0
Age        2
Address    0
dtype: int64


In [13]:
# Row or Column without values can be dropped
# But be careful like using another variable

row_Drop_df = df.dropna()
row_Drop_df.head()

Unnamed: 0,Roll,Name,Age,Address
0,1.0,Ram,22.0,Dharan
4,5.0,Manav,19.0,Dharan


In [14]:
column_drop_df = df.dropna(axis=1)
column_drop_df.head()

Unnamed: 0,Name,Address
0,Ram,Dharan
1,Shyam,Biratnagar
2,Hari,Damank
3,Sita,Janakpur
4,Manav,Dharan


In [15]:
#Fill garna sakinxa like fillna()

df['Age'] = df['Age'].fillna(df['Age'].mean())
df.head()

Unnamed: 0,Roll,Name,Age,Address
0,1.0,Ram,22.0,Dharan
1,2.0,Shyam,20.333333,Biratnagar
2,,Hari,20.0,Damank
3,4.0,Sita,20.333333,Janakpur
4,5.0,Manav,19.0,Dharan
