**Introduction**

This notebook explores different functionalities of the pandas library. 

In [None]:
#Creating a dataframe to store the passenger data of the Titanic

import pandas as pd
df = pd.DataFrame(
    {'Name' : [
        'Braund, Mr. Owen Harris',
        'Allen, Mr. William Henry',
        'Bonnell, Miss. Elizabeth'
        ],
    'Age' : [22,35,58],
    'Sex' : ['Male', 'Male', 'Female']


    }

)
print(df.head()) #each column in a dataframe is a series

#Extracting a single column from a dataframe

df['Age'] #a pandas series has no column labels as it is a single column of a dataframe. It does have row labels. 


In [None]:
#Creating a series from scratch

ages = pd.Series([21,22,23], name = 'Age')

#Performing operation on Dataframe or Series

print(df['Age'].mean().__round__(2))

print(ages.min())

#Getting the basic statistics of the numerical data 

df.describe()


In [None]:
#Reading the titanic data and storing it as a pandas dataframe

titanic = pd.read_csv('/workspaces/Pandas/Datasets/titanic.csv')

#Displaying the first 8 rows

titanic.head(8)

#Displaying the last 10 rows
titanic.tail(10)

#Checking how pandas interpreted each of the column datatype

titanic.dtypes #displays data attribute, is not a function and therefore, does not require parenthesis


In [None]:
#Extracting the titanic dataset in a csv format
#titanic.to_excel('titanic.xlsx', sheet_name= 'Passenger', engine = 'xlsxwriter')

#Getting a technical summary of the dataframe

titanic.info()


In [None]:
#Selecting a subset of the dataframe

ages = titanic[['Age']] #one dimensional object, hence a series. If multiple columns were selected, it would be a 2D object i.e a dataframe with rows and columns.
#double brackets - dataframe
#Checking the type of the output

type(ages)
ages.shape #since it's a series, it returns a tuple with the length of the dataframe


# #Selecting multiple columns

age_sex = titanic[['Age', 'Sex']] #outer brackets-selecting data from a dataframe; inner brackets - Python list
age_sex
type(age_sex)
age_sex.shape

#Filtering specific rows from a dataframe

above_35  = titanic[titanic['Age']> 35] #filtering rows #use condition inside the selection brackets []. Only returns values of the dataframe that are True for the condition. 

titanic['Age'] > 35

above_35.shape



In [123]:
#Passengers from cabin class 2 and 3 - using isin() conditional function which returns a boolean value for each row based on the condition
class_23 = titanic[titanic['Pclass'].isin([2,3])]

age_known = titanic[titanic['Age'].notna()]
display(age_known)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2500,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.9250,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,,S
...,...,...,...,...,...,...,...,...,...,...,...,...
885,886,0,3,"Rice, Mrs. William (Margaret Norton)",female,39.0,0,5,382652,29.1250,,Q
886,887,0,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0000,,S
887,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0000,B42,S
889,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0000,C148,C


In [None]:
#Selecting specific rows and columns from a DataFrame
#Selecting the names of the passengers who are 35 years or older

adult_names = titanic.loc[titanic['Age'] > 35,'Name'] # the part before the comma is the rows you want, the part after the comma is the rows you select.
titanic

#Selecting 10 rows and 25 columns
titanic.iloc[9:25, 2:5] #indexing for rows and columns starts from 0[inclusive,exclusive]

#changes the first 3 rows of the name column to anonymous. Directly modifies the dataframe
titanic.iloc[0:3, 3] = "anonymous"

titanic.head()



Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,anonymous,male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,anonymous,female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,anonymous,female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S
