# Introduction to Pandas

In [None]:
import pandas as pd

### Creating a DataFrame from Python objects

In [None]:
df = pd.DataFrame(["a", "b", "c"], columns=["numbers"])

In [None]:
df = pd.DataFrame([
    [1, "hello", True],
    [2, "hey", False],
    [3, "Paris", False],
    [3, "test", True],
], columns=["numbers", "word", "some_bool"])

In [None]:
books = [
    {"Author": "a", "Title": "A", "Year": 1990},
    {"Author": "b", "Title": "B", "Year": 1991},
    {"Author": "c", "Title": "C", "Year": 1992},
    {"Author": "d", "Title": "D", "Year": 1993},
]

In [None]:
df = pd.DataFrame(books)
df

In [None]:
df.to_dict(orient='records')

In [None]:
len(df.columns)

In [None]:
df[["Year", "Author"]]

In [None]:
type(df[["Year"]])

In [None]:
type(df["Year"])

In [None]:
df.columns

### Reading a DataFrame from a file / exporting to a file

In [None]:
import seaborn as sns

In [None]:
df = sns.load_dataset('titanic')
df.head()

In [None]:
df.to_csv('titanic.csv', index_label='passenger_id')

In [None]:
# ! head titanic.csv
# ! type titanic.csv

In [None]:
# ! wc -l titanic.csv

In [None]:
df = pd.read_csv("titanic.csv", sep=";", encoding="utf8", usecols=["Survived", "Name"])

In [None]:
df = pd.read_excel("myfile.xlsx")

In [None]:
pd.read_json("myfile.json")

### Reading and writing from/to SQL from/to Pandas

In [None]:
# If you are using PIP:
#
# pip install "sqlalchemy<2.0.0"
# pip install mysqlclient
#
# OR if you are using Anaconda:
#
# conda install "sqlalchemy<2.0.0"
# conda install mysqlclient

In [None]:
MYSQL_CONNECTION_STRING = "mysql://root@localhost/ironhack"
df = pd.read_sql("SELECT * FROM `co2_emissions`", MYSQL_CONNECTION_STRING)

In [None]:
df.to_sql('co2_emissions_bis', MYSQL_CONNECTION_STRING, if_exists='replace', index=False)

In [None]:
pd.read_sql("SELECT * FROM `co2_emissions_bis`", MYSQL_CONNECTION_STRING)

### Applying Mathematical Functions to Data Frames

In [None]:
df = pd.read_csv("titanic.csv", sep=";")

In [None]:
df["age"].mean()

In [None]:
df[["age", "fare"]].mean()

In [None]:
df.head()

In [None]:
print(df.iloc[0,3])
print(df.iloc[0:3,2:4])
display(df.iloc[0:5, [2,3,8]])

In [None]:
display(df.loc[[0,1,2,3],'sex'])
display(df.loc[0:5,'sex'])
display(df.loc[:,['class','alive']])
display(df.loc[:,'class':'alive'])

In [None]:
display(df.loc[df['class']=='First','age'].mean())

In [None]:
df['sibsp_plus_parch'] = df['sibsp']==df['parch']

In [None]:
df

In [None]:
df['embark_town'].value_counts(ascending=True).idxmax()