# DataFrame

All methods are same as pandas series.

### Insert row and column

In [17]:
import numpy as np
import pandas as pd

data = np.random.randint(low=10,high=36,size=15).reshape((3,5))

df = pd.DataFrame(
    data,
    # index=["a","b","c"],
    columns=["first","second","third","fourth","fifth"]
)
print(df)

#row insert
row = [np.random.randint(low=10,high=36,size=5)]
rowIndex=1
df = pd.concat([
    df.iloc[:rowIndex],
    pd.DataFrame(row,columns=df.columns),
    df.iloc[rowIndex:]
],ignore_index=True)
print(f"\nInserted a new row at index {rowIndex} :\n{df}")

#column insert
col = np.random.randint(low=10,high=36,size=4)
colIndex=3
df.insert(colIndex,"new_col",col)
print(f"\nInserted a new col at index {colIndex} :\n{df}")


   first  second  third  fourth  fifth
0     25      31     31      35     15
1     22      35     21      29     14
2     32      35     31      15     21

Inserted a new row at index 1 :
   first  second  third  fourth  fifth
0     25      31     31      35     15
1     33      13     28      13     32
2     22      35     21      29     14
3     32      35     31      15     21

Inserted a new col at index 3 :
   first  second  third  new_col  fourth  fifth
0     25      31     31       19      35     15
1     33      13     28       28      13     32
2     22      35     21       32      29     14
3     32      35     31       15      15     21


### Grouping

In [30]:
import pandas as pd

df = pd.DataFrame({
    "names":["first","second","third","fourth","fifth"],
    "age":[20,21,21,21,32],
    "salary":[20000,32450,64000,25000,30000]  
})
print(df)

groupAges = df.groupby(["age"]).sum()
print(f"\nGrouped by age :\n{groupAges}")


    names  age  salary
0   first   20   20000
1  second   21   32450
2   third   21   64000
3  fourth   21   25000
4   fifth   32   30000

Grouped by age :
                 names  salary
age                           
20               first   20000
21   secondthirdfourth  121450
32               fifth   30000


### Merging and renaming

In [17]:
import pandas as pd

customers = pd.DataFrame({
    "id":[1,2,3,4,5],
    "name":["Ranit","Neha","Shouvik","Satadru","Ranadip"]
})

orders = pd.DataFrame({
    "customerId":[2,3,1],
    "price":[150,3300,730]
})

print(f"Customer details :\n{customers}")
print(f"\nOrder details :\n{orders}")

mergedInner = pd.merge(customers,orders,left_on="id",right_on="customerId",how="inner")\
            .drop(columns=["customerId"])\
            .rename(columns={
                "name":"customer name",
                "price":"purchase cost"
            })
print(f"\nInner merged:\n{mergedInner}")

Customer details :
   id     name
0   1    Ranit
1   2     Neha
2   3  Shouvik
3   4  Satadru
4   5  Ranadip

Order details :
   customerId  price
0           2    150
1           3   3300
2           1    730

Inner merged:
   id customer name  purchase cost
0   1         Ranit            730
1   2          Neha            150
2   3       Shouvik           3300


### More on handling missing data

In [34]:
import pandas as pd

df = pd.DataFrame({
    "names":["first","second","third","fourth","fifth"],
    "age":[None,36,None,20,32],
    "salary":[20000,None,64000,25000,30000]  
})

print(df)

# interpolation
df[["age", "salary"]] = df[["age", "salary"]].interpolate(
    method="linear", axis=0, limit_direction="both"
)

print("\nAfter interpolation :\n",df)

    names   age   salary
0   first   NaN  20000.0
1  second  36.0      NaN
2   third   NaN  64000.0
3  fourth  20.0  25000.0
4   fifth  32.0  30000.0

After interpolation :
     names   age   salary
0   first  36.0  20000.0
1  second  36.0  42000.0
2   third  28.0  64000.0
3  fourth  20.0  25000.0
4   fifth  32.0  30000.0
