# Pandas Day 6

## Splitting a string into different columns

In [12]:
import pandas as pd
import numpy as np
import seaborn as sns

In [13]:
# Create a DataFrame 
df = pd.DataFrame({
    "Name": ["Asad Ali", "Adnan Ali", "Husnain Ali","Zain Ali"],
    "Location": ["NewYork USA", "LosAngeles USA", "Chicago USA", "Houston USA"],
})
print("Original DataFrame:")
print(df)

Original DataFrame:
          Name        Location
0     Asad Ali     NewYork USA
1    Adnan Ali  LosAngeles USA
2  Husnain Ali     Chicago USA
3     Zain Ali     Houston USA


In [14]:
# Let's Splitting a Name  Column
df[["First Name", "Last Name"]] = df["Name"].str.split(" ", expand=True)
print("\nDataFrame after splitting 'Name' into 'First Name' and 'Last Name':")
print(df[["First Name", "Last Name"]])
# Dropping the original 'Name' column
df = df.drop(columns=["Name"])
print("\nDataFrame after dropping the 'Name' column:")
print(df)


DataFrame after splitting 'Name' into 'First Name' and 'Last Name':
  First Name Last Name
0       Asad       Ali
1      Adnan       Ali
2    Husnain       Ali
3       Zain       Ali

DataFrame after dropping the 'Name' column:
         Location First Name Last Name
0     NewYork USA       Asad       Ali
1  LosAngeles USA      Adnan       Ali
2     Chicago USA    Husnain       Ali
3     Houston USA       Zain       Ali


In [15]:
# Let's Splitting the 'Location' column into two new columns: 'City' and 'Country'
df[["City","Country"]] = df["Location"].str.split(" ",expand=True, n=1)
print("\nDataFrame after splitting 'Location' into 'City' and 'Country':")
print(df[["City", "Country"]])
# Dropping the original 'Location' column
df = df.drop(columns=["Location"])
print("\nDataFrame after dropping the 'Location' column:")
print(df)


DataFrame after splitting 'Location' into 'City' and 'Country':
         City Country
0     NewYork     USA
1  LosAngeles     USA
2     Chicago     USA
3     Houston     USA

DataFrame after dropping the 'Location' column:
  First Name Last Name        City Country
0       Asad       Ali     NewYork     USA
1      Adnan       Ali  LosAngeles     USA
2    Husnain       Ali     Chicago     USA
3       Zain       Ali     Houston     USA


## Aggregation by Multiple Groups/Functions

In [16]:
df = sns.load_dataset("titanic")
# Display the first few rows of the DataFrame
print("\nTitanic Dataset:")
print(df.head())


Titanic Dataset:
   survived  pclass     sex   age  sibsp  parch     fare embarked  class  \
0         0       3    male  22.0      1      0   7.2500        S  Third   
1         1       1  female  38.0      1      0  71.2833        C  First   
2         1       3  female  26.0      0      0   7.9250        S  Third   
3         1       1  female  35.0      1      0  53.1000        S  First   
4         0       3    male  35.0      0      0   8.0500        S  Third   

     who  adult_male deck  embark_town alive  alone  
0    man        True  NaN  Southampton    no  False  
1  woman       False    C    Cherbourg   yes  False  
2  woman       False  NaN  Southampton   yes   True  
3  woman       False    C  Southampton   yes  False  
4    man        True  NaN  Southampton    no   True  


In [19]:
# Grouping by 'who' and counting the number of occurrences
print("\nCount of occurrences grouped by 'who':")
print(df.groupby("who").count())


Count of occurrences grouped by 'who':
       survived  pclass  sex  age  sibsp  parch  fare  embarked  class  \
who                                                                      
child        83      83   83   83     83     83    83        83     83   
man         537     537  537  413    537    537   537       537    537   
woman       271     271  271  218    271    271   271       269    271   

       adult_male  deck  embark_town  alive  alone  
who                                                 
child          83    13           83     83     83  
man           537    99          537    537    537  
woman         271    91          269    271    271  


In [23]:
# Grouping by 'sex' ,  'adult_male' and 'embark_town' and counting the number of occurrences
print("\nCount of occurrences grouped by 'sex',  'adult_male', and 'embark_town':")
print(df.groupby(["sex", "adult_male", "embark_town"]).count())


Count of occurrences grouped by 'sex',  'adult_male', and 'embark_town':
                               survived  pclass  age  sibsp  parch  fare  \
sex    adult_male embark_town                                              
female False      Cherbourg          73      73   61     73     73    73   
                  Queenstown         36      36   12     36     36    36   
                  Southampton       203     203  186    203    203   203   
male   False      Cherbourg           5       5    5      5      5     5   
                  Queenstown          4       4    4      4      4     4   
                  Southampton        31      31   31     31     31    31   
       True       Cherbourg          90      90   64     90     90    90   
                  Queenstown         37      37   12     37     37    37   
                  Southampton       410     410  337    410    410   410   

                               embarked  class  who  deck  alive  alone  
sex    adult_ma