In [1]:
import numpy as np
import pandas as pd
import os
import seaborn as sns
import psycopg2
import matplotlib.pyplot as plt

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.feature_selection import RFE
from sklearn.linear_model import LogisticRegression
from statsmodels.formula.api import ols, logit

from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from sklearn.preprocessing import PolynomialFeatures
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import confusion_matrix, plot_confusion_matrix, classification_report
from statsmodels.graphics.mosaicplot import mosaic
from password import password

In [2]:
#create and examine dataframe
crisis_df = pd.read_csv("crisis_data.csv")
crisis_df

Unnamed: 0,crisis_index,crisis_name,crisis_iso3,figure_name,figure_source,figure_value,figure_date,figure_url
0,1,Afghanistan,AFG,People in Need,OCHA,24400000,2021-12-02,https://reliefweb.int/node/3796720
1,1,Afghanistan,AFG,People Targeted for Assistance,OCHA,22100000,2021-12-02,https://reliefweb.int/node/3796720
2,1,Afghanistan,AFG,Children in Need,UNICEF,12900000,2021-12-07,https://reliefweb.int/node/3798486
3,1,Afghanistan,AFG,People in Food Crisis/Emergency (IPC phase 3+),IPC,18844000,2021-10-25,https://reliefweb.int/node/3785295
4,1,Afghanistan,AFG,Undocumented Afghan Returnees from Iran (per y...,IOM,858956,2021-09-09,https://reliefweb.int/node/3775407
...,...,...,...,...,...,...,...,...
616,30,Zimbabwe,ZWE,Refugees & Asylum-Seekers in Zimbabwe,UNHCR,22600,2021-08-31,https://reliefweb.int/node/3778376
617,30,Zimbabwe,ZWE,DRC Refugees in Zimbabwe,UNHCR,12020,2021-08-31,https://reliefweb.int/node/3778376
618,30,Zimbabwe,ZWE,Mozambican Asylum-Seekers in Zimbabwe,UNHCR,8157,2021-08-31,https://reliefweb.int/node/3778376
619,30,Zimbabwe,ZWE,People Received WFP Food Assistance (per month),WFP,859182,2021-11-30,https://reliefweb.int/node/3802533


In [3]:
#check for NaN or missing values
crisis_df.isnull().sum()

crisis_index     0
crisis_name      0
crisis_iso3      0
figure_name      0
figure_source    0
figure_value     0
figure_date      0
figure_url       0
dtype: int64

In [4]:
#replace commas in data so it won't mess with the data
crisis_df["figure_name"] = crisis_df["figure_name"].str.replace(",","")
crisis_df

Unnamed: 0,crisis_index,crisis_name,crisis_iso3,figure_name,figure_source,figure_value,figure_date,figure_url
0,1,Afghanistan,AFG,People in Need,OCHA,24400000,2021-12-02,https://reliefweb.int/node/3796720
1,1,Afghanistan,AFG,People Targeted for Assistance,OCHA,22100000,2021-12-02,https://reliefweb.int/node/3796720
2,1,Afghanistan,AFG,Children in Need,UNICEF,12900000,2021-12-07,https://reliefweb.int/node/3798486
3,1,Afghanistan,AFG,People in Food Crisis/Emergency (IPC phase 3+),IPC,18844000,2021-10-25,https://reliefweb.int/node/3785295
4,1,Afghanistan,AFG,Undocumented Afghan Returnees from Iran (per y...,IOM,858956,2021-09-09,https://reliefweb.int/node/3775407
...,...,...,...,...,...,...,...,...
616,30,Zimbabwe,ZWE,Refugees & Asylum-Seekers in Zimbabwe,UNHCR,22600,2021-08-31,https://reliefweb.int/node/3778376
617,30,Zimbabwe,ZWE,DRC Refugees in Zimbabwe,UNHCR,12020,2021-08-31,https://reliefweb.int/node/3778376
618,30,Zimbabwe,ZWE,Mozambican Asylum-Seekers in Zimbabwe,UNHCR,8157,2021-08-31,https://reliefweb.int/node/3778376
619,30,Zimbabwe,ZWE,People Received WFP Food Assistance (per month),WFP,859182,2021-11-30,https://reliefweb.int/node/3802533


In [5]:
crisis_df.to_csv('crisis_data.csv', index= False)

In [21]:
#in PostGresQL create a new database named project_2 to add new table to, and use psycopg2 to connect to it
conn = psycopg2.connect(dbname='project_2', user='postgres', password=password)
#create a cursor to perform database operations
cursor= conn.cursor()

In [22]:
#create query to create a table
cursor.execute("DROP TABLE IF EXISTS crisis_main")
query=("""CREATE TABLE crisis_main(
	crisis_index integer,
	crisis_name text,
	crisis_iso3 text,
	figure_name text,
	figure_source text,
	figure_value integer,
	figure_date varchar,
	figure_url varchar
    )""")
#execute query
cursor.execute(query)

In [23]:
#use copy from to input values from csv into postgres table
with open('crisis_data.csv', 'r') as f:
    next(f) # Skip the header row.
    cursor.copy_from(f, 'crisis_main', sep=',')

In [24]:
#commit the table
conn.commit()

In [27]:
cursor.execute("SELECT * FROM crisis_main")
cursor.fetchall()

[(1,
  'Afghanistan',
  'AFG',
  'People in Need',
  'OCHA',
  24400000,
  '2021-12-02',
  'https://reliefweb.int/node/3796720'),
 (1,
  'Afghanistan',
  'AFG',
  'People Targeted for Assistance',
  'OCHA',
  22100000,
  '2021-12-02',
  'https://reliefweb.int/node/3796720'),
 (1,
  'Afghanistan',
  'AFG',
  'Children in Need',
  'UNICEF',
  12900000,
  '2021-12-07',
  'https://reliefweb.int/node/3798486'),
 (1,
  'Afghanistan',
  'AFG',
  'People in Food Crisis/Emergency (IPC phase 3+)',
  'IPC',
  18844000,
  '2021-10-25',
  'https://reliefweb.int/node/3785295'),
 (1,
  'Afghanistan',
  'AFG',
  'Undocumented Afghan Returnees from Iran (per year)',
  'IOM',
  858956,
  '2021-09-09',
  'https://reliefweb.int/node/3775407'),
 (1,
  'Afghanistan',
  'AFG',
  'Undocumented Afghan Returnees from Pakistan (per year)',
  'IOM',
  7933,
  '2021-09-09',
  'https://reliefweb.int/node/3775407'),
 (1,
  'Afghanistan',
  'AFG',
  'Conflict-Induced Displacements (per year)',
  'OCHA',
  677000,
  '