In [1]:
import numpy as np
import pandas as pd
import os
import seaborn as sns
import psycopg2
import matplotlib.pyplot as plt

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.feature_selection import RFE
from sklearn.linear_model import LogisticRegression
from statsmodels.formula.api import ols, logit

from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from sklearn.preprocessing import PolynomialFeatures
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import confusion_matrix, plot_confusion_matrix, classification_report
from statsmodels.graphics.mosaicplot import mosaic
from password import password

In [2]:
#create and examine dataframe
crisis_df = pd.read_csv("crisis_data.csv")
crisis_df

Unnamed: 0,crisis_index,crisis_name,crisis_iso3,figure_name,figure_source,figure_value,figure_date,figure_url
0,1,Afghanistan,AFG,People in Need,OCHA,24400000,2021-12-02,https://reliefweb.int/node/3796720
1,1,Afghanistan,AFG,People Targeted for Assistance,OCHA,22100000,2021-12-02,https://reliefweb.int/node/3796720
2,1,Afghanistan,AFG,Children in Need,UNICEF,12900000,2021-12-07,https://reliefweb.int/node/3798486
3,1,Afghanistan,AFG,People in Food Crisis/Emergency (IPC phase 3+),IPC,18844000,2021-10-25,https://reliefweb.int/node/3785295
4,1,Afghanistan,AFG,Undocumented Afghan Returnees from Iran (per y...,IOM,858956,2021-09-09,https://reliefweb.int/node/3775407
...,...,...,...,...,...,...,...,...
616,30,Zimbabwe,ZWE,Refugees & Asylum-Seekers in Zimbabwe,UNHCR,22600,2021-08-31,https://reliefweb.int/node/3778376
617,30,Zimbabwe,ZWE,DRC Refugees in Zimbabwe,UNHCR,12020,2021-08-31,https://reliefweb.int/node/3778376
618,30,Zimbabwe,ZWE,Mozambican Asylum-Seekers in Zimbabwe,UNHCR,8157,2021-08-31,https://reliefweb.int/node/3778376
619,30,Zimbabwe,ZWE,People Received WFP Food Assistance (per month),WFP,859182,2021-11-30,https://reliefweb.int/node/3802533


In [3]:
#check for NaN or missing values
crisis_df.isnull().sum()

crisis_index     0
crisis_name      0
crisis_iso3      0
figure_name      0
figure_source    0
figure_value     0
figure_date      0
figure_url       0
dtype: int64

In [4]:
#replace commas in data so it won't mess with the data
crisis_df["figure_name"] = crisis_df["figure_name"].str.replace(",","")
crisis_df

Unnamed: 0,crisis_index,crisis_name,crisis_iso3,figure_name,figure_source,figure_value,figure_date,figure_url
0,1,Afghanistan,AFG,People in Need,OCHA,24400000,2021-12-02,https://reliefweb.int/node/3796720
1,1,Afghanistan,AFG,People Targeted for Assistance,OCHA,22100000,2021-12-02,https://reliefweb.int/node/3796720
2,1,Afghanistan,AFG,Children in Need,UNICEF,12900000,2021-12-07,https://reliefweb.int/node/3798486
3,1,Afghanistan,AFG,People in Food Crisis/Emergency (IPC phase 3+),IPC,18844000,2021-10-25,https://reliefweb.int/node/3785295
4,1,Afghanistan,AFG,Undocumented Afghan Returnees from Iran (per y...,IOM,858956,2021-09-09,https://reliefweb.int/node/3775407
...,...,...,...,...,...,...,...,...
616,30,Zimbabwe,ZWE,Refugees & Asylum-Seekers in Zimbabwe,UNHCR,22600,2021-08-31,https://reliefweb.int/node/3778376
617,30,Zimbabwe,ZWE,DRC Refugees in Zimbabwe,UNHCR,12020,2021-08-31,https://reliefweb.int/node/3778376
618,30,Zimbabwe,ZWE,Mozambican Asylum-Seekers in Zimbabwe,UNHCR,8157,2021-08-31,https://reliefweb.int/node/3778376
619,30,Zimbabwe,ZWE,People Received WFP Food Assistance (per month),WFP,859182,2021-11-30,https://reliefweb.int/node/3802533


In [5]:
crisis_df.to_csv('crisis_data.csv', index= False)

In [21]:
#in PostGresQL create a new database named project_2 to add new table to, and use psycopg2 to connect to it
conn = psycopg2.connect(dbname='project_2', user='postgres', password=password)
#create a cursor to perform database operations
cursor= conn.cursor()

In [22]:
#create query to create a table
cursor.execute("DROP TABLE IF EXISTS crisis_main")
query=("""CREATE TABLE crisis_main(
	crisis_index integer,
	crisis_name text,
	crisis_iso3 text,
	figure_name text,
	figure_source text,
	figure_value integer,
	figure_date varchar,
	figure_url varchar
    )""")
#execute query
cursor.execute(query)

In [23]:
#use copy from to input values from csv into postgres table
with open('crisis_data.csv', 'r') as f:
    next(f) # Skip the header row.
    cursor.copy_from(f, 'crisis_main', sep=',')

In [24]:
#commit the table
conn.commit()

In [39]:
#sort countries by figure values to see which countries have the most affected number of people
crisis_df.groupby('crisis_index')['figure_value'].sum().nlargest(5)

crisis_index
28    96649039
9     96371805
10    95984105
1     81186300
25    80722069
Name: figure_value, dtype: int64

In [41]:
#sort countries to make new database of those affected in top 5 highest affected countries
grouped= crisis_df.groupby(crisis_df.crisis_name)
first_df= grouped.get_group('Yemen')
first_df

Unnamed: 0,crisis_index,crisis_name,crisis_iso3,figure_name,figure_source,figure_value,figure_date,figure_url
575,28,Yemen,YEM,People in Need,OCHA,20700000,2021-12-02,https://reliefweb.int/node/3796720
576,28,Yemen,YEM,People Targeted for Assistance,OCHA,16000000,2021-12-02,https://reliefweb.int/node/3796720
577,28,Yemen,YEM,People in Acute Need,OCHA,12100000,2021-02-21,https://reliefweb.int/node/3715186
578,28,Yemen,YEM,Children in Need,UNICEF,12400000,2020-12-14,https://reliefweb.int/node/3697063
579,28,Yemen,YEM,People in Food Crisis/Emergency (IPC phase 3+),IPC,13500000,2020-12-03,https://reliefweb.int/node/3693940
580,28,Yemen,YEM,Acutely Malnourished Children,IPC,2254663,2021-02-12,https://reliefweb.int/node/3712799
581,28,Yemen,YEM,Acutely Malnourished Pregnant and Lactating Women,IPC,1155653,2021-02-12,https://reliefweb.int/node/3712799
582,28,Yemen,YEM,Children IDPs,UNICEF,2000000,2021-09-30,https://reliefweb.int/node/3790816
583,28,Yemen,YEM,People Displaced in/to Ma’rib Governorate (sin...,OCHA,64450,2021-12-01,https://reliefweb.int/node/3796676
584,28,Yemen,YEM,Displaced Households (per year),UNHCR,26227,2022-01-01,https://reliefweb.int/node/3805338


In [42]:
first_df.to_csv('first_data.csv', index= False)

In [43]:
cursor.execute("DROP TABLE IF EXISTS first_crisis")
query=("""CREATE TABLE first_crisis(
	crisis_index integer,
	crisis_name text,
	crisis_iso3 text,
	figure_name text,
	figure_source text,
	figure_value integer,
	figure_date varchar,
	figure_url varchar
    )""")
cursor.execute(query)

In [44]:
with open('first_data.csv', 'r') as f:
    next(f)
    cursor.copy_from(f, 'first_crisis', sep=',')

In [45]:
conn.commit()

In [47]:
#repeat process for each country
second_df= grouped.get_group('Democratic Republic of the Congo')
second_df.to_csv('second_data.csv', index= False)


In [48]:
cursor.execute("DROP TABLE IF EXISTS second_crisis")
query2=("""CREATE TABLE second_crisis(
	crisis_index integer,
	crisis_name text,
	crisis_iso3 text,
	figure_name text,
	figure_source text,
	figure_value integer,
	figure_date varchar,
	figure_url varchar
    )""")
cursor.execute(query2)

In [49]:
with open('second_data.csv', 'r') as f:
    next(f)
    cursor.copy_from(f, 'second_crisis', sep=',')

conn.commit()

In [50]:
third_df= grouped.get_group('Ethiopia')
third_df.to_csv('third_data.csv', index= False)

In [51]:
cursor.execute("DROP TABLE IF EXISTS third_crisis")
query3=("""CREATE TABLE third_crisis(
	crisis_index integer,
	crisis_name text,
	crisis_iso3 text,
	figure_name text,
	figure_source text,
	figure_value integer,
	figure_date varchar,
	figure_url varchar
    )""")
cursor.execute(query3)

In [52]:
with open('third_data.csv', 'r') as f:
    next(f)
    cursor.copy_from(f, 'third_crisis', sep=',')

conn.commit()

In [53]:
fourth_df= grouped.get_group('Afghanistan')
fourth_df.to_csv('fourth_data.csv', index= False)

In [54]:
cursor.execute("DROP TABLE IF EXISTS fourth_crisis")
query4=("""CREATE TABLE fourth_crisis(
	crisis_index integer,
	crisis_name text,
	crisis_iso3 text,
	figure_name text,
	figure_source text,
	figure_value integer,
	figure_date varchar,
	figure_url varchar
    )""")
cursor.execute(query4)

In [55]:
with open('fourth_data.csv', 'r') as f:
    next(f)
    cursor.copy_from(f, 'fourth_crisis', sep=',')

conn.commit()

In [56]:
fifth_df= grouped.get_group('Syria')
fifth_df.to_csv('fifth_data.csv', index= False)

In [57]:
cursor.execute("DROP TABLE IF EXISTS fifth_crisis")
query5=("""CREATE TABLE fifth_crisis(
	crisis_index integer,
	crisis_name text,
	crisis_iso3 text,
	figure_name text,
	figure_source text,
	figure_value integer,
	figure_date varchar,
	figure_url varchar
    )""")
cursor.execute(query5)

In [58]:
with open('fifth_data.csv', 'r') as f:
    next(f)
    cursor.copy_from(f, 'fifth_crisis', sep=',')

conn.commit()