## What are dataframes 

* How dataframes are created explanation using dictionaries

In [None]:
import pandas as pd

In [None]:
# Basic python dictionary
dogs = {
    'name':['Goldy','Snow','Jack','Tomm','Hero','Zed','Ozzy'],
    'breed':['Retriver','Huskey','Shepard','Symoid','Labrador','Dalmation','Shepard'],
    'color':['Golden','White/Black','Brown','White','Black','White/Black','Brown'],
    'age':[6,7,12,2,6,10,3],
    'owner_email':['goldy1@email.com','snow1@email.com','jack1@email.com','tomm1@email.com','hero1@email.com','zed1@email.com','ozzy1@gmail.com']
} 

In [None]:
dogs

In [None]:
dogs['owner_email'][3]

#### Converting a Dict to a  pandas DataFrame

In [None]:
# Convert dict to a DF
df = pd.DataFrame(dogs)
df

* A pandas series is basically a table column
* A Dataframe is a collection of multiple series

In [None]:
# Types
print(type(df))

print(type(df['name']))

In [None]:
# Get info about the dataframe
df.info()

#### Accessing specific values in a Data Frame

In [None]:
# Accessing values of a column

# Both methods work but top method is recommended
# Sometimes column name may be same as pandas attribute
# this may result in errors

df['breed']

# df.breed


In [None]:
# Accessing multiple columns

# use double square brackets
# use a list of column names

df[['breed','color']]


In [None]:
# Get all columns and details
df.columns

In [None]:
# Accessing Rows

# iloc = int location

# syntax
# {df_name}.iloc[index]
df.iloc[0]

In [None]:
# multiple rows using iloc 
df.iloc[[2,4]]

In [None]:
# Get multiple rows and column

# {dataframe_name}.iloc[[row_index],[column_index]]
df.iloc[[0,2,1],[2,0,4]]

In [None]:
# Accessing rows with loc
# label location

df.loc[1]

In [None]:

df.loc[[3,2,1,0],['name','breed']]

# Thiese two are same

# df.iloc[[3,2,1,0],[0,1]]

## Index 
* Indexing in pandas

In [None]:
df

In [None]:
# set up temporary index
df.set_index('owner_email')

In [None]:
df

In [None]:
# permenent
df.set_index('owner_email', inplace=True)
df

In [None]:
df.index

In [None]:
df.loc[['jack1@email.com','hero1@email.com'],['breed','color']]

In [None]:
# Reset index
df.reset_index(inplace=True)
df

### Using conditionals to filter rows and columns

In [None]:
# Used Data Frame 
df

In [None]:
# using conditionals example
# Filter mask
get_breed = (df['breed'] == 'Shepard')
# mask
get_breed

In [None]:
df['age'] > 6

In [None]:
# get values of that filter mask

df[get_breed]

# This also work

# df[df['color'] == 'Brown']

In [None]:
# Better way

df.loc[get_breed, ['owner_email','name','breed']]

In [None]:
#  & == AND
#  | == OR
# ~ == NOT

In [None]:
get_req = ((df['breed'] == 'Shepard') & (df['name'] == 'Jack'))

# df[get_req]
df.loc[~get_req]

In [None]:
get_req = (df['breed'] == 'Shepard')

# df[get_req]

# 
df.loc[~get_req]

#### Updating specific rows and columns


In [None]:
# Convert dict to a DF
df = pd.DataFrame(dogs)

* Updating column values


In [None]:
# Changing column names

df.columns
df.columns = ['dog_name', 'breed', 'fur_color', 'age', 'owner_email']
df

In [None]:
# Changing the column names

# Making all uppercase/lower
# df.columns = [x.upper() for x in df.columns]
df.columns = [x.lower() for x in df.columns]

# replacing space with _
# df.columns = df.columns.str.replace(' ','_')

df.columns


In [None]:
# Changing only specific values

# Pass as dict
df.rename(columns={'name':'dog_name','color':'fur_color'}, inplace=True)

df

In [None]:
df

#### Updating Row values

* Single row changes

In [None]:
df

In [None]:
# Select the required value and replace it 

# for 1 item

df.loc[2,'age'] = 9


# for multiple items

df.loc[0:2,'age'] = [3,7,1]

In [None]:
# Multiple string values
df.loc[3,['dog_name','owner_email']] = ['Link','link1@email.com']

# same results
# df.at[3,['name','owner_email']] = ['Link','link1@email.com']



In [None]:
df

In [None]:
# Changing values using conditionals

test = (df['owner_email'] == 'ron1@email.com')
df.loc[test,['dog_name','owner_email']] = ['Momo','momo1@email.com']


# test = (df['dog_name'] == 'Momo')
# df.loc[test,'dog_name'] = 'Dogo'


# test = (df['dog_name'] == 'Momo')
# df.loc[test,'owner_email'] = 'dogo1@email.com'

# df

In [None]:
df

* Changing values of multiple rows

In [None]:
# Changing values in multiple rows

df['owner_email'] = df['owner_email'].str.upper()
df['owner_email'] = df['owner_email'].str.lower()

df

### 4 methods for updating data in pandas

* apply
* map
* applymap
* replace

#### apply

In [None]:
# Check len of all elements in a column

# pass func without ()

df['breed'].apply(len) 

In [None]:
# simple use of apply
# a func that changes the case of string


def update_str(breed):
    return breed.upper()

# Dont execute the func
df['breed'].apply(update_str)

# to assign the value perm.
# df['breed'] = df['breed'].apply(update_str)

# Use lambda for better code if fuc is simple

df['breed'] = df['breed'].apply(lambda x: x.lower())

df['breed']

In [None]:
# simple use of apply
# a func that adds 1 year to age


# def update_age(age):
#     return age+1


# Dont execute the func just pass it

# df['age'].apply(update_age)



# to change the value just assign it

# df['age'] = df['age'].apply(update_age)


# Use lambda for better code if fuc is simple

df['age'].apply(lambda x: x+1)

In [None]:
df

In [None]:
# this applies to only the head of the columns
df.apply(len)

In [None]:
# this returns min form all series
df.apply(pd.Series.min)

In [None]:
# this applies to only the value given value 
# replaces all the ones not given to NaN

# df['breed'].map({'retriver':'golden_retriver'})



df['breed'] = df['breed'].replace({'retriver':'golden_retriver'})

In [None]:
# max and min in a column

# df['age'].min()

df['age'].max()


### Add and remove columns and rows 

In [None]:
# Dataframe working on 
df = pd.DataFrame(dogs)
df

In [None]:
# example of addng a new colun

# making a new series by joining two series
df['name'] + ' ' + df['breed']

df['dog_name_breed'] = df['name'] + ' ' + df['breed']

df

In [None]:
# deleting a column
df.drop(columns='dog_name_breed',inplace=True)

In [None]:
# split a df column series into 2 columns

# split one into to by space
# makes t col named 0 and 1
# df['to_expand'].str.split(' ',expand=True)


# rename the col to 1col and 2col and assign to org df
# df[['1col','2col']] = df['to_expand'].str.split(' ',expand=True)

* Add rows to a DataFrame 

In [None]:
# using append

# this gives an error
# df.append({'name':'Kalay'})
# pass ignore_index=True
# temp
df.append({'name':'Kalay'},ignore_index=True)

# add a whole row
df = df.append({'name':'Tonka', 'breed':'Symoid', 'color':'White','age':3, 'owner_email':'tonka1@email.com'},ignore_index=True)

In [None]:
# join 2 df

# Temp changes
# df1.append(df2, ignore_index=True)

# To get rid of the warning 
# df1.append(df2, ignore_index=True, sort = False)


# Perm changes
# df1 = df1.append(df2, ignore_index=True, sort = False)

# Better way of doing the same thing
# pd.concat([df1,df2], ignore_index=True)



In [None]:
df

In [None]:
# dropping a row using filter to get index


filt = (df['name'] == 'Tonka')
# df.loc[filt]

df.drop(index = df[filt].index,inplace = True)

In [None]:
df

### Sorting data

In [None]:
# Working Dataframe
df = pd.DataFrame(dogs)

# add a new row
df = df.append({'name':'Tonka', 'breed':'Symoid', 'color':'White','age':3, 'owner_email':'tonka1@email.com'},ignore_index=True)

df

In [None]:
# Normal sort

# Ascending order
df.sort_values(by='age')

# Descending order
df.sort_values(by='age',ascending=False)

In [None]:
# Multiple columns


# Sort by breed first and then name 
df.sort_values(by=['breed','name'])

# Multiple sort args
# df.sort_values(by=['breed','name'],ascending=[True,False])

In [None]:
# Make changes permenant

# Sort by breed first and then name 
df.sort_values(by=['breed','name'], inplace=True)
df

# reverse changes
df.sort_index()

In [None]:
# Another way 

# single
df['name'].sort_values()


# multiple
# df[['breed','name']].sort_values(by='breed')