## ****Pandas Advance**** -3

In [1]:
import pandas as pd
import numpy as np

In [2]:
url = "https://raw.githubusercontent.com/datasciencedojo/datasets/refs/heads/master/titanic.csv"
df = pd.read_csv(url)
df

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2500,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.9250,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,,S
...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,0,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0000,,S
887,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0000,B42,S
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.4500,,S
889,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0000,C148,C


#### Concatinating DataFrames

In [8]:
df1 = df[['Name', 'Sex', 'Age']][0:5]
df1

Unnamed: 0,Name,Sex,Age
0,"Braund, Mr. Owen Harris",male,22.0
1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0
2,"Heikkinen, Miss. Laina",female,26.0
3,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0
4,"Allen, Mr. William Henry",male,35.0


In [9]:
df2 = df[['Name', 'Sex', 'Age']][5:10]
df2

Unnamed: 0,Name,Sex,Age
5,"Moran, Mr. James",male,
6,"McCarthy, Mr. Timothy J",male,54.0
7,"Palsson, Master. Gosta Leonard",male,2.0
8,"Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)",female,27.0
9,"Nasser, Mrs. Nicholas (Adele Achem)",female,14.0


## Concatenation in Pandas

### String Concatenation
```python
"pw" + "skills"  # Output: "pwskills"
```

### DataFrame Concatenation
concatenation means keep 2 dataframes either horizontally or vertically
#### Vertical Concatenation (Rows, `axis=0`)
Stacks DataFrames **row-wise** (default behavior).
```python
pd.concat([df1, df2])
```
- Keeps all columns, filling missing values with `NaN` if needed.

#### Horizontal Concatenation (Columns, `axis=1`)
Joins DataFrames **column-wise**, aligning by index.
```python
pd.concat([df1, df2], axis=1)
```
- If indexes don’t match, fills gaps with `NaN`.

### Example:
```python
import pandas as pd

df1 = pd.DataFrame({'A': [1, 2]})
df2 = pd.DataFrame({'B': [3, 4]})

# Vertical Concatenation
print(pd.concat([df1, df2], axis=0))  

# Horizontal Concatenation
print(pd.concat([df1, df2], axis=1))  
```

### Output:
#### Vertical Concatenation:
```
     A    B
0  1.0  NaN
1  2.0  NaN
0  NaN  3.0
1  NaN  4.0
```
#### Horizontal Concatenation:
```
   A  B
0  1  3
1  2  4
```



In [25]:
pd.concat([df1, df2])
# pd.concat([df1, df2], axis=0)
# vertical concatenation
# Concatenate pandas objects along a particular axis.
# By default axis 0, means the dataframe will be concatenated row wise(appears to be vertical)

Unnamed: 0,Name,Sex,Age
0,"Braund, Mr. Owen Harris",male,22.0
1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0
2,"Heikkinen, Miss. Laina",female,26.0
3,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0
4,"Allen, Mr. William Henry",male,35.0
0,"Moran, Mr. James",male,
1,"McCarthy, Mr. Timothy J",male,54.0
2,"Palsson, Master. Gosta Leonard",male,2.0
3,"Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)",female,27.0
4,"Nasser, Mrs. Nicholas (Adele Achem)",female,14.0


In [None]:
pd.concat([df1, df2], axis=1)
# Horizontal concatenationa
# It will match by index and concatenate it, If index doesn't match 

Unnamed: 0,Name,Sex,Age,Name.1,Sex.1,Age.1
0,"Braund, Mr. Owen Harris",male,22.0,,,
1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,,,
2,"Heikkinen, Miss. Laina",female,26.0,,,
3,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,,,
4,"Allen, Mr. William Henry",male,35.0,,,
5,,,,"Moran, Mr. James",male,
6,,,,"McCarthy, Mr. Timothy J",male,54.0
7,,,,"Palsson, Master. Gosta Leonard",male,2.0
8,,,,"Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)",female,27.0
9,,,,"Nasser, Mrs. Nicholas (Adele Achem)",female,14.0


In [20]:
df2.reset_index(drop=True, inplace=True)
df2

Unnamed: 0,Name,Sex,Age
0,"Moran, Mr. James",male,
1,"McCarthy, Mr. Timothy J",male,54.0
2,"Palsson, Master. Gosta Leonard",male,2.0
3,"Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)",female,27.0
4,"Nasser, Mrs. Nicholas (Adele Achem)",female,14.0


In [21]:
df1.reset_index(drop=True, inplace=True)
df1

Unnamed: 0,Name,Sex,Age
0,"Braund, Mr. Owen Harris",male,22.0
1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0
2,"Heikkinen, Miss. Laina",female,26.0
3,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0
4,"Allen, Mr. William Henry",male,35.0


In [24]:
pd.concat([df1, df2], axis=1)

Unnamed: 0,Name,Sex,Age,Name.1,Sex.1,Age.1
0,"Braund, Mr. Owen Harris",male,22.0,"Moran, Mr. James",male,
1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,"McCarthy, Mr. Timothy J",male,54.0
2,"Heikkinen, Miss. Laina",female,26.0,"Palsson, Master. Gosta Leonard",male,2.0
3,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,"Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)",female,27.0
4,"Allen, Mr. William Henry",male,35.0,"Nasser, Mrs. Nicholas (Adele Achem)",female,14.0


## Merging vs. Joining in Pandas

Merging and joining are ways to combine two DataFrames based on a common column or index, similar to SQL joins. While `merge()` provides more flexibility and control, `join()` is mainly used for quick index-based operations.

### Key Differences Between `merge()` and `join()`
- **`merge()`**: Works on columns, supports different join types, and provides fine control over how DataFrames are combined.
- **`join()`**: Works on indexes by default and is often used for fast lookups and merging based on index alignment.

---

## Types of Merging in Pandas

### **1. Inner Merge (Intersection)**
Returns only the matching rows from both DataFrames based on a common column.
```python
import pandas as pd

df1 = pd.DataFrame({'ID': [1, 2, 3], 'Name': ['Alice', 'Bob', 'Charlie']})
df2 = pd.DataFrame({'ID': [2, 3, 4], 'Score': [85, 90, 95]})

# Inner Merge
print(pd.merge(df1, df2, on='ID', how='inner'))
```
#### Output:
```
   ID   Name  Score
0   2    Bob     85
1   3  Charlie     90
```

### **2. Left Merge**
Keeps all rows from the **left DataFrame** and only matching rows from the right DataFrame.
```python
print(pd.merge(df1, df2, on='ID', how='left'))
```
#### Output:
```
   ID   Name  Score
0   1  Alice    NaN
1   2    Bob   85.0
2   3  Charlie   90.0
```

### **3. Right Merge**
Keeps all rows from the **right DataFrame** and only matching rows from the left DataFrame.
```python
print(pd.merge(df1, df2, on='ID', how='right'))
```
#### Output:
```
   ID   Name  Score
0   2    Bob     85
1   3  Charlie     90
2   4    NaN     95
```

### **4. Outer Merge (Full Join)**
Includes all rows from both DataFrames, filling missing values with `NaN` where necessary.
```python
print(pd.merge(df1, df2, on='ID', how='outer'))
```
#### Output:
```
   ID   Name  Score
0   1  Alice    NaN
1   2    Bob   85.0
2   3  Charlie   90.0
3   4    NaN     95
```

### **5. Cross Merge (Cross Join)**
Creates all possible combinations of rows from both DataFrames.
```python
print(pd.merge(df1, df2, how='cross'))
```
#### Output:
```
   ID_x   Name  ID_y  Score
0     1  Alice     2     85
1     1  Alice     3     90
2     1  Alice     4     95
3     2    Bob     2     85
4     2    Bob     3     90
5     2    Bob     4     95
...
```
This method results in `m × n` rows, where `m` and `n` are the number of rows in the two DataFrames.



In [27]:
df1 = pd.DataFrame({
    'key1' : [1, 2, 4, 5, 6],
    'key2': [4, 5, 6, 7, 8],
    'key3':[3, 4, 5, 6, 6]
})
df1

Unnamed: 0,key1,key2,key3
0,1,4,3
1,2,5,4
2,4,6,5
3,5,7,6
4,6,8,6


In [29]:
df2 = pd.DataFrame({
    'key1' : [1, 2, 45, 6, 67],
    'key4':[56, 5, 6, 7, 8],
    'key5':[3, 56, 5, 6, 6]
})
df2

Unnamed: 0,key1,key4,key5
0,1,56,3
1,2,5,56
2,45,6,5
3,6,7,6
4,67,8,6


In [None]:
# Inner merge
pd.merge(df1, df2, how='inner')

Unnamed: 0,key1,key2,key3,key4,key5
0,1,4,3,56,3
1,2,5,4,5,56
2,6,8,6,7,6


In [None]:
# Left merge
pd.merge(df1, df2, how='left')

Unnamed: 0,key1,key2,key3,key4,key5
0,1,4,3,56.0,3.0
1,2,5,4,5.0,56.0
2,4,6,5,,
3,5,7,6,,
4,6,8,6,7.0,6.0


In [32]:
# Right Merge
pd.merge(df1, df2, how='right')

Unnamed: 0,key1,key2,key3,key4,key5
0,1,4.0,3.0,56,3
1,2,5.0,4.0,5,56
2,45,,,6,5
3,6,8.0,6.0,7,6
4,67,,,8,6


In [None]:
# Outter Merge >> All the common elements of the common column from both the dataFrame
pd.merge(df1, df2, how='outer')

Unnamed: 0,key1,key2,key3,key4,key5
0,1,4.0,3.0,56.0,3.0
1,2,5.0,4.0,5.0,56.0
2,4,6.0,5.0,,
3,5,7.0,6.0,,
4,6,8.0,6.0,7.0,6.0
5,45,,,6.0,5.0
6,67,,,8.0,6.0


In [34]:
# Cross(join) 
pd.merge(df1, df2, how='cross')

Unnamed: 0,key1_x,key2,key3,key1_y,key4,key5
0,1,4,3,1,56,3
1,1,4,3,2,5,56
2,1,4,3,45,6,5
3,1,4,3,6,7,6
4,1,4,3,67,8,6
5,2,5,4,1,56,3
6,2,5,4,2,5,56
7,2,5,4,45,6,5
8,2,5,4,6,7,6
9,2,5,4,67,8,6


## Merging DataFrames on Different Column Names

When merging two DataFrames, sometimes the common key columns have different names. In such cases, we use `left_on` and `right_on` parameters in `pd.merge()` to specify the columns to match from each DataFrame.

### Example:
```python
import pandas as pd

df1 = pd.DataFrame({
    'key2': [1, 2, 3],
    'Name': ['Alice', 'Bob', 'Charlie']
})

df2 = pd.DataFrame({
    'key4': [2, 3, 4],
    'Score': [85, 90, 95]
})

# Merging on different column names
merged_df = pd.merge(df1, df2, how='left', left_on='key2', right_on='key4')
print(merged_df)
```

### Output:
```
   key2   Name  key4  Score
0     1  Alice   NaN    NaN
1     2    Bob   2.0   85.0
2     3  Charlie   3.0   90.0
```

### Explanation:
- **`left_on='key2'`**: Uses the column `key2` from `df1`.
- **`right_on='key4'`**: Uses the column `key4` from `df2`.
- **`how='left'`**: Keeps all rows from `df1`, and only matching rows from `df2`. If no match is found, NaN is inserted.

This approach is useful when working with datasets where the column names differ but still represent the same data concept.



In [36]:
df1

Unnamed: 0,key1,key2,key3
0,1,4,3
1,2,5,4
2,4,6,5
3,5,7,6
4,6,8,6


In [37]:
df2

Unnamed: 0,key1,key4,key5
0,1,56,3
1,2,5,56
2,45,6,5
3,6,7,6
4,67,8,6


In [35]:
pd.merge(df1, df2, how='left', left_on="key2", right_on='key4')

Unnamed: 0,key1_x,key2,key3,key1_y,key4,key5
0,1,4,3,,,
1,2,5,4,2.0,5.0,56.0
2,4,6,5,45.0,6.0,5.0
3,5,7,6,6.0,7.0,6.0
4,6,8,6,67.0,8.0,6.0


### Understanding `join()` in Pandas

The `join()` method combines two DataFrames based on their index by default. It is useful when data shares a common index but different columns.

#### Key Features:
- Default: Left join on index.
- Supports `left`, `right`, `inner`, `outer` joins.
- Can join on columns using `on`.

#### Example 1: Joining on Index
```python
import pandas as pd

df1 = pd.DataFrame({'Name': ['Alice', 'Bob', 'Charlie']}, index=[1, 2, 3])
df2 = pd.DataFrame({'Score': [85, 90, 95]}, index=[2, 3, 4])

# Left join on index
df_joined = df1.join(df2)
print(df_joined)
```
**Output:**
```
     Name  Score
1   Alice    NaN
2     Bob   85.0
3  Charlie   90.0
```

#### Example 2: Outer Join on Index
```python
df_joined_outer = df1.join(df2, how='outer')
print(df_joined_outer)
```
**Output:**
```
     Name  Score
1   Alice    NaN
2     Bob   85.0
3  Charlie   90.0
4     NaN   95.0
```

#### Example 3: Joining on a Column
If DataFrames don’t share an index, use `on`:
```python
df1 = pd.DataFrame({'ID': [1, 2, 3], 'Name': ['Alice', 'Bob', 'Charlie']})
df2 = pd.DataFrame({'ID': [2, 3, 4], 'Score': [85, 90, 95]})

# Join on 'ID' column
df_joined = df1.set_index('ID').join(df2.set_index('ID'), how='inner')
print(df_joined)
```
**Output:**
```
     Name  Score
2     Bob     85
3  Charlie     90
```

#### `join()` vs `merge()`
| Feature        | `join()` | `merge()` |
|---------------|---------|----------|
| Default Join  | Index-based | Column-based |
| Different Column Names | No | Yes (`left_on`, `right_on`) |
| Flexibility | Less flexible | More control |

Use `join()` for index-based joins and `merge()` for column-based joins.



In [40]:
df1 = pd.DataFrame({
    'key1':[1,2,4,5,6],
    'key2':[4,5,6,7,8],
    'key3':[3,4,5,6,6]},
    index = ['a', 'b', 'c', 'd', 'e']
)
df1

Unnamed: 0,key1,key2,key3
a,1,4,3
b,2,5,4
c,4,6,5
d,5,7,6
e,6,8,6


In [42]:
df2 = pd.DataFrame({
    'key6':[1, 2, 45, 6, 67],
    'key4':[56, 5, 6, 7, 8],
    'key5':[3, 56, 5, 6, 6]},
    index=['a', 'b', 'h', 'i', 'j'] 
)
df2

Unnamed: 0,key6,key4,key5
a,1,56,3
b,2,5,56
h,45,6,5
i,6,7,6
j,67,8,6


In [None]:
# Inner Join
df1.join(df2, how='inner')

Unnamed: 0,key1,key2,key3,key6,key4,key5
a,1,4,3,1,56,3
b,2,5,4,2,5,56


In [44]:
# Left Join
df1.join(df2, how='left')

Unnamed: 0,key1,key2,key3,key6,key4,key5
a,1,4,3,1.0,56.0,3.0
b,2,5,4,2.0,5.0,56.0
c,4,6,5,,,
d,5,7,6,,,
e,6,8,6,,,


In [45]:
# Right Join
df1.join(df2, how='right')

Unnamed: 0,key1,key2,key3,key6,key4,key5
a,1.0,4.0,3.0,1,56,3
b,2.0,5.0,4.0,2,5,56
h,,,,45,6,5
i,,,,6,7,6
j,,,,67,8,6


In [None]:
# Outter join
df1.join(df2, how='outer')

Unnamed: 0,key1,key2,key3,key6,key4,key5
a,1.0,4.0,3.0,1.0,56.0,3.0
b,2.0,5.0,4.0,2.0,5.0,56.0
c,4.0,6.0,5.0,,,
d,5.0,7.0,6.0,,,
e,6.0,8.0,6.0,,,
h,,,,45.0,6.0,5.0
i,,,,6.0,7.0,6.0
j,,,,67.0,8.0,6.0


In [48]:
# Cross Join >> it is same as previous coss merge
df1.join(df2, how='cross')

Unnamed: 0,key1,key2,key3,key6,key4,key5
0,1,4,3,1,56,3
1,1,4,3,2,5,56
2,1,4,3,45,6,5
3,1,4,3,6,7,6
4,1,4,3,67,8,6
5,2,5,4,1,56,3
6,2,5,4,2,5,56
7,2,5,4,45,6,5
8,2,5,4,6,7,6
9,2,5,4,67,8,6


In [53]:
df

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2500,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.9250,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,,S
...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,0,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0000,,S
887,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0000,B42,S
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.4500,,S
889,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0000,C148,C


### Replacing Fare's columns value 

In [56]:
# apply >> To apply a function on all the elements/values of a column
# It will work row wise
df["Fare_INR"] = df['Fare'].apply(lambda x:x * 90 )

In [57]:
df

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,Fare_INR
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2500,,S,652.500
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C,6415.497
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.9250,,S,713.250
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,C123,S,4779.000
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,,S,724.500
...,...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,0,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0000,,S,1170.000
887,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0000,B42,S,2700.000
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.4500,,S,2110.500
889,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0000,C148,C,2700.000


In [63]:
# df["len_name"] = df["Name"].apply(len)

df["len_name"] = df['Name'].apply(lambda x: len(x))
df

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,Fare_INR,len_name
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2500,,S,652.500,23
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C,6415.497,51
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.9250,,S,713.250,22
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,C123,S,4779.000,44
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,,S,724.500,24
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,0,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0000,,S,1170.000,21
887,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0000,B42,S,2700.000,28
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.4500,,S,2110.500,40
889,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0000,C148,C,2700.000,21


In [64]:
# Can we apply user defined function

def convert(x):
    return x*90

df["Fare"].apply(convert)

0       652.500
1      6415.497
2       713.250
3      4779.000
4       724.500
         ...   
886    1170.000
887    2700.000
888    2110.500
889    2700.000
890     697.500
Name: Fare, Length: 891, dtype: float64

In [65]:
def level_fare(x):
    if(x < 15):
        return "Cheap Fare"
    elif(x < 32):
        return "Medium Fare"
    elif(x > 32):
        return "High Fare"

df["Level_Fare"] = df["Fare"].apply(level_fare)
df

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,Fare_INR,len_name,Level_Fare
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2500,,S,652.500,23,Cheap Fare
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C,6415.497,51,High Fare
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.9250,,S,713.250,22,Cheap Fare
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,C123,S,4779.000,44,High Fare
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,,S,724.500,24,Cheap Fare
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,0,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0000,,S,1170.000,21,Cheap Fare
887,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0000,B42,S,2700.000,28,Medium Fare
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.4500,,S,2110.500,40,Medium Fare
889,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0000,C148,C,2700.000,21,Medium Fare


In [70]:
data = {
    'a':[1, 2, 3, 4],
    'b':[5, 5, 6, 7],
    'c':["Pwskills", "Mritunjay", "thakur", "Paridhi"]
}

df1 = pd.DataFrame(data)
df1

Unnamed: 0,a,b,c
0,1,5,Pwskills
1,2,5,Mritunjay
2,3,6,thakur
3,4,7,Paridhi


In [71]:
df1.set_index(keys="c",inplace=True, drop=True)
df1

Unnamed: 0_level_0,a,b
c,Unnamed: 1_level_1,Unnamed: 2_level_1
Pwskills,1,5
Mritunjay,2,5
thakur,3,6
Paridhi,4,7


In [72]:
df1.reset_index(drop=False, inplace=True)

In [73]:
df1

Unnamed: 0,c,a,b
0,Pwskills,1,5
1,Mritunjay,2,5
2,thakur,3,6
3,Paridhi,4,7


In [74]:
df1.reindex([1, 2, 3, 0]) #Makes new index for series for dataframe

Unnamed: 0,c,a,b
1,Mritunjay,2,5
2,thakur,3,6
3,Paridhi,4,7
0,Pwskills,1,5


### Iterating row wise in a DataFrame

In [77]:
for i in df1.iterrows():
    print(i, "----")

(0, c    Pwskills
a           1
b           5
Name: 0, dtype: object) ----
(1, c    Mritunjay
a            2
b            5
Name: 1, dtype: object) ----
(2, c    thakur
a         3
b         6
Name: 2, dtype: object) ----
(3, c    Paridhi
a          4
b          7
Name: 3, dtype: object) ----


### Iterating column wise in a DataFrame

In [80]:
for i in df1.items():
    print(i, end="\n______________________\n")

('c', 0     Pwskills
1    Mritunjay
2       thakur
3      Paridhi
Name: c, dtype: object)
______________________
('a', 0    1
1    2
2    3
3    4
Name: a, dtype: int64)
______________________
('b', 0    5
1    5
2    6
3    7
Name: b, dtype: int64)
______________________


### How to apply function in a whole DataFrame

In [None]:
df1

Unnamed: 0,c,a,b
0,Pwskills,1,5
1,Mritunjay,2,5
2,thakur,3,6
3,Paridhi,4,7


In [83]:
df1.drop(columns=["c"], inplace=True)

In [84]:
df1

Unnamed: 0,a,b
0,1,5
1,2,5
2,3,6
3,4,7


In [85]:
def func_sum(x):
    return x.sum()

In [87]:
df1.apply(func_sum, axis=1)

0     6
1     7
2     9
3    11
dtype: int64

In [88]:
df1.apply(func_sum, axis=0)

a    10
b    23
dtype: int64

In [89]:
df1.apply(lambda x: x**2)

Unnamed: 0,a,b
0,1,25
1,4,25
2,9,36
3,16,49


In [91]:
data = {
    'a':[100, 22, 13, 42],
    'b':[5, 5, 6, 7],
    'c':["Pwskills", "Mritunjay", "thakur", "Paridhi"]
}

df2 = pd.DataFrame(data)
df2

Unnamed: 0,a,b,c
0,100,5,Pwskills
1,22,5,Mritunjay
2,13,6,thakur
3,42,7,Paridhi


In [92]:
# Sorting the values
df2.sort_values(by='a')

Unnamed: 0,a,b,c
2,13,6,thakur
1,22,5,Mritunjay
3,42,7,Paridhi
0,100,5,Pwskills
