In [None]:
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline

## Demo One: Basic Plotting with Pandas, Matplotlib and Seaborn

In [None]:
df = pd.read_csv("SAFI_full_shortname.csv")
df.head()

In [None]:
df.columns.values

In [None]:
df['remittance_money']

In [None]:
#Generate a histogram with column values along the x-axis

df['years_liv'].hist()


In [None]:
#Specify number of bars in the histogram

df['years_liv'].hist(bins=20)


In [None]:
#Generate a histogram with vales from the years_liv column, grouped by village. The layout and fig size parameters specifies how the graphs should be presented.

df.hist(column='years_liv',by='village',layout = (1,3), figsize=(12,3), sharex=True)

In [None]:
#Generate a scatterplot (c paramater indicates color, cmap the color theme)

df.plot.scatter(x='gps_Latitude', y='gps_Longitude', c='gps_Altitude', cmap="viridis", figsize=[4,4])

In [None]:
#Generate a bar graph based on aggregated data

rooms_mean = df.groupby('respondent_roof_type')['members_count'].mean()
rooms_mean.plot.bar(color="purple", ylabel="Inhabitants", xlabel="Roof Types", title="Inhabitants Per Roof Type")


In [None]:
#Generate a boxplot of the numbers of buildings, grouped by village

df.boxplot(by ='village',column=['buildings_in_compound'], figsize=(5,5))

In [None]:
#Use the Seaborn library to make a prettier box plot automatically

import seaborn as sns
sns.boxplot(data = df, x = 'village', y = 'buildings_in_compound')

In [None]:
#Use Seaborn to create a scatterplot with linear regression

sns.lmplot(x='years_farm', y='years_liv',data=df,hue='village')

## Exercise One

In [None]:
df.plot.scatter(x = 'years_liv', y = 'years_farm', c = 'buildings_in_compound', colormap = 'viridis')

In [None]:
colors = ["pink", "red", "green", "orange"]
rooms_mean = df.groupby('respondent_wall_type')['rooms'].mean()
rooms_mean.plot.bar(color=colors)

In [None]:
#Use Seaborn to create a scatterplot with linear regression

sns.lmplot(x='no_plots', y='liv_count',data=df)

## Demo Two: Further Customization of Plots

In [None]:
#Create a random dataset using NumPy

import numpy as np

# Generate some date for 2 sets of points.
x1 = pd.Series(np.random.rand(20) - 0.5)
y1 = pd.Series(np.random.rand(20) - 0.5)

x2 = pd.Series(np.random.rand(20) + 0.5)
y2 = pd.Series(np.random.rand(20) + 0.5)



In [None]:


# Create an empty plot with our title, y and x axis labels
plt.title('Scatter Plot')
plt.ylabel('Range of y values')
plt.xlabel('Range of x values')


# Plot the points in a scatter plot and add a legend
plt.scatter(x1, y1, c='red', label='Red Range')  
plt.scatter(x2, y2, c='blue', label='Blue Range')

plt.legend(loc="lower right")  


In [None]:
#Adjust size and opacity of the dots

plt.scatter(x1, y1, c='red', label='Red Range', s=10, alpha=0.5)
plt.scatter(x2, y2, c='blue', label='Blue Range', s=100, alpha=0.5)

  



In [None]:
df.boxplot(column = 'buildings_in_compound', by = 'village', figsize = (5,5))

In [None]:
df.boxplot(column = 'buildings_in_compound', by = 'village', figsize = (5,5)) 
plt.suptitle(None) # remove the automatic title
plt.title('Buildings in compounds per village')   
plt.ylabel('Number of buildings')   
plt.xlabel(None)   
plt.grid(None)  

In [None]:


df.boxplot(column = 'buildings_in_compound', by = 'village')
plt.suptitle(None) 
plt.title('Buildings in compounds per village')   
plt.ylabel('Number of buildings')  
plt.xlabel(None) 
plt.grid(None) 

#Saving graph as a pdf or png file
plt.savefig('safi_boxplot_buildings.pdf')   # save as pdf file
plt.savefig('safi_boxplot_buildings.png', dpi = 150)  # save as png file, some extra arguments are provided

## Exercise Two

In [None]:
#Exercise 2 Solution

import pandas as pd
import matplotlib.pyplot as plt

data = {
    'car': ['Toyota', 'Honda', 'Ford', 'Chevrolet', 'BMW', 'Tesla', 'Audi'],
    'horsepower': [150, 120, 170, 200, 250, 300, 180],
    'mpg': [30, 35, 25, 20, 18, 15, 22]}

df2 = pd.DataFrame(data)

plt.scatter(df2['horsepower'], df2['mpg'], c='pink', s=100)
plt.xlabel('Horsepower')
plt.ylabel('MPG')
plt.title('Horsepower vs MPG')
plt.savefig('horsepower.png')
