## Customization with Pandas


In [None]:
import pandas as pd

In [None]:
df_SAFI = pd.read_csv('/content/drive/MyDrive/TRIADS_workshops/workshop_data/SAFI.csv')

# Basic Customization with Pandas

In [None]:
# Reloading our basic scatterplot

df_SAFI.plot.scatter(x='years_liv', y='years_farm')

In [None]:
# Adjusting the x and y axis limits from the default

df_SAFI.plot.scatter(x='years_liv', y='years_farm', xlim=(0,120), ylim=(0,80))

In [None]:
# Changing the  size of the figure

df_SAFI.plot.scatter(x='years_liv', y='years_farm', figsize=(10,8))

In [None]:
# Inserting a figure title, and x and y axis labels

df_SAFI.plot.scatter(x='years_liv', y='years_farm', title="My Scatterplot!", xlabel="Years: Lived", ylabel="Years: Farmed")

In [None]:
# Changing the color and size of the dots

df_SAFI.plot.scatter(x='years_liv', y='years_farm', color='teal', s=80)

In [None]:
# Combining all of the above!

df_SAFI.plot.scatter(x='years_liv', y='years_farm',
                     xlim=(0,120), ylim=(0,80),
                     color='teal', s=80,
                     figsize=(10,8),
                     title="My Scatterplot!", xlabel="Years: Lived", ylabel="Years: Farmed")


# Exercise

Using the documentation for plotting in pandas:

https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.plot.html#

1. Make a line plot that graphs the change in GDP of France and Germany (using your gdp_europe.csv), where the France line is in Pink and the German line is in Purple.

2.  Create a horizontal bar chart of the Spain's GDP for each year in the dataset. Make the bars orange, include gridlines to make the graph easier to read, insert the title "Spain GDP" and label the x axis "US Dollars" (Bonus part: can you make the bars display from lowest to highest GDP?)


In [None]:
# Load in the dataframe

df_europe = pd.read_csv('/content/drive/MyDrive/TRIADS_workshops/workshop_data/gdp_europe.csv', index_col=0)

# Part 1: France and Germany Graph

df_fr_gr = df_europe.loc[["France", "Germany"]]

df_fr_gr.T.plot(kind="line", color={"France": "pink", "Germany": "purple"})

In [None]:
# Part 2: Spain Graph

df_europe.loc["Spain"].plot(kind='barh', color="orange", grid=True, title="Spain GDP", xlabel="US Dollars")

In [None]:
# Bonus: Spain Graph in reverse order

df_sorted = df_europe.sort_values(by="Spain", ascending=False, axis=1)

df_sorted.loc["Spain"].plot(kind='barh', color="orange", grid=True, title="Spain GDP", xlabel="US Dollars")



## Further Customization with Matplotlib

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
# Creating a basic scatterplot using pandas, and customizing it using matplotlib

df_SAFI.plot.scatter(x = 'years_liv', y = 'years_farm')

plt.title('Years Lived vs Years Farming')
plt.xlabel('Years Lived')
plt.ylabel('Years Farming')

In [None]:
# Creating a basic scatterplot using pandas, and customizing it using matplotlib

df_SAFI.plot.scatter(x = 'years_liv', y = 'years_farm', color='pink', s=40, figsize=(14,10))

plt.title('Years Lived vs Yearsfarming')
plt.xlabel('Years Lived')
plt.ylabel('Years Farming')



In [None]:
# Creating a basic boxplot with pandas

df_SAFI.boxplot(column = 'buildings_in_compound', by = 'village', figsize = (5,5))

In [None]:
# Creating a boxplot with pandas, and customizing it with matplotlib

df_SAFI.boxplot(column = 'buildings_in_compound', by = 'village', figsize = (5,5))

plt.suptitle(None) #removes automatic title
plt.title('Buildings in compounds per village')
plt.ylabel('Number of buildings')
plt.xlabel(None)
plt.grid(None)

In [None]:
# Creating boxplot, customizing it with matplotlib, and saving it to a file!

df_SAFI.boxplot(column = 'buildings_in_compound', by = 'village')

plt.suptitle(None)
plt.title('Buildings in compounds per village')
plt.ylabel('Number of buildings')
plt.xlabel(None)
plt.grid(None)

#Saving graph as a pdf or png file
plt.savefig('safi_boxplot_buildings.pdf')   # save as pdf file
plt.savefig('safi_boxplot_buildings.png', dpi = 150)  # save as png file, some extra arguments are provided

In [None]:
# Create a scatterplot, color code it based on altitude

df_SAFI.plot.scatter(x='gps:Latitude', y='gps:Longitude', c='gps:Altitude', figsize=[4,4])

In [None]:
# Create a scatterplot, color code it based on altitude, and set the colors as a built-in color map

df_SAFI.plot.scatter(x='gps:Latitude', y='gps:Longitude', c='gps:Altitude', cmap="viridis", figsize=[4,4])



In [None]:
# Create a scatterplot, color code it based on altitude, and set the colors as a custom color map

from matplotlib.colors import ListedColormap

colors = ["blue", "green", "orange", 'yellow']

custom_colors = ListedColormap(colors)

df_SAFI.plot.scatter(x='gps:Latitude', y='gps:Longitude', c='gps:Altitude', cmap=custom_colors, figsize=[4,4])

## Exercise Two

Create a bar graph that visualizes the average number of rooms in each farm per wall type in SAFI.csv, and set the colors for the bars as pink, red, orange and green. (Bonus-how can you make the cement bar green?)

In [None]:
# Solution: bar graph that visualizes the average number of rooms in each farm per wall type in SAFI.csv, and set the colors for the bars as pink, red, orange and green.

colors = ["pink", "green", "red", "orange"]

rooms_mean = df_SAFI.groupby('respondent_wall_type')['rooms'].mean()

rooms_mean.plot.bar(color=colors)

In [None]:
# Alternative solution to the above, but ensuring that the cement bar will be green, regardless of list order

colors_list = ["pink", "red", "orange"]

colors = []
i = 0
for wall in rooms_mean.index:
    if wall == 'cement':
        colors.append('green')
    else:
        colors.append(colors_list[i])
        i += 1

rooms_mean.plot.bar(color=colors)



In [None]:
# Yet another alternative using itertools!

from itertools import cycle

other_colors = cycle(["pink", "red", "orange"])
colors = ['green' if wall == 'cement' else next(other_colors) for wall in rooms_mean.index]

rooms_mean.plot.bar(color=colors)
