# Chapter 1

## Inner Join

In [None]:
# The ward data
import pandas as pd

wards = pd.read_csv('Ward_Offices.csv')
print(wards.head())
print(wards.shape)

census = pd.read_csv('Ward_Offices.csv')
print(census.head())
print(census.shape)

In [None]:
# Inner join
wards_census = wards.merge(census, on='ward')
print(wards_census.head(4))
print(wards_census.shape)

In [None]:
# Suffixes
print(wards_census.columns)

In [None]:
wards_census = wards.merge(census, on='ward', suffixes=('_ward', '_cen'))
print(wards_census.head(4))
print(wards_census.shape)

In [None]:
# Merge the taxi_owners and taxi_veh tables setting a suffix
taxi_own_veh = taxi_owners.merge(taxi_veh, on='vid', suffixes=('_own','_veh'))

# Print the value_counts to find the most popular fuel_type
print(taxi_own_veh['fuel_type'].value_counts())

## One-to-many reltionships

In [None]:
ward_licenses = wards.merge(licenses, on='ward', suffixes=('_ward', '_lic'))
print(ward_licenses.head())

In [None]:
# Merge the licenses and biz_owners table on account
licenses_owners = licenses.merge(biz_owners, on='account')

# Group the results by title then count the number of accounts
counted_df = licenses_owners.groupby('title').agg({'account':'count'})

# Sort the counted_df in desending order
sorted_df = counted_df.sort_values('account', ascending=False)

# Use .head() method to print the first few rows of sorted_df
print(sorted_df.head())

## Merging multiple DataFrames

In [None]:
import pandas as pd
import numpy as np

grants_licenses = grants.merge(licenses, on='zip')
print(grants_licenses.loc[grants_licenses['business'] == "REGGIE'S BAR & GRILL",
['grant', 'company', 'account', 'ward', 'business']])

In [None]:
# Single merge
grants.merge(licenses, on=['address', 'zip'])

In [None]:
grants_licenses_word = grants.merge(licenses, on=['address', 'zip']) \
                        .merge(wards, on='ward', suffixes=('_bus', '_ward'))
grants_licenses_word.head()

In [None]:
# Results
import matplotlib.pyplot as plt

grants_licenses_word.groupby('ward').agg('sum').plot(kind='bar', y='grant')
plt.show()

In [None]:
# Merging even more...

# Three tables:
df1.merge(df2, on='col') \
    .merge(df3, on='col')

# Four Tables
df1.merge(df2, on='col') \
    .merge(df3, on='col') \
    .merge(df4, on='col')

In [None]:
# Merge the ridership, cal, and stations tables
ridership_cal_stations = ridership.merge(cal, on=['year','month','day']) \
							.merge(stations, on='station_id')

# Create a filter to filter ridership_cal_stations
filter_criteria = ((ridership_cal_stations['month'] == 7) 
                   & (ridership_cal_stations['day_type'] == 'Weekday') 
                   & (ridership_cal_stations['station_name'] == 'Wilson'))

# Use .loc and the filter to select for rides
print(ridership_cal_stations.loc[filter_criteria, 'rides'].sum())

In [None]:
# Merge licenses and zip_demo, on zip; and merge the wards on ward
licenses_zip_ward = licenses.merge(zip_demo, on='zip') \
            			.merge(wards, on='ward')

# Print the results by alderman and show median income
print(licenses_zip_ward.groupby('alderman').agg({'income':'median'}))

In [None]:
# Merge land_use and census and merge result with licenses including suffixes
land_cen_lic = land_use.merge(census, on='ward') \
                    .merge(licenses, on='ward', suffixes=('_cen','_lic'))

# Group by ward, pop_2010, and vacant, then count the # of accounts
pop_vac_lic = land_cen_lic.groupby(['ward','pop_2010','vacant'], 
                                   as_index=False).agg({'account':'count'})

# Sort pop_vac_lic and print the results
sorted_pop_vac_lic = pop_vac_lic.sort_values(['vacant', 'account', 'pop_2010'], 
                                             ascending=[False, True, True])

# Print the top few rows of sorted_pop_vac_lic
print(sorted_pop_vac_lic.head())

# Chapter 2

## Left Join

In [None]:
# Movies table
movies = pd.read_csv('tmdb_movies.csv')
print(movies.head())
print(movies.shape)

In [None]:
# Tagline table
taglines = pd.read_csv('tmdb_taglines.csv')
print(taglines.head())
print(taglines.shape)

In [None]:
# Merge with left join
movies_taglines = movies.merge(taglines, on='id', how='left')
print(movies_taglines.head())
print(movies_taglines.shape)

In [None]:
# Merge the movies table with the financials table with a left join
movies_financials = movies.merge(financials, on='id', how='left')

# Count the number of rows in the budget column that are missing
number_of_missing_fin = movies_financials['budget'].isnull().sum()

# Print the number of movies missing financials
print(number_of_missing_fin)