In [1]:
import numpy as np
import pandas as pd
import geopandas as gpd
import pickle
import urllib.request

In [2]:
def get_prio_shape():

    location = '/home/simon/Documents/Bodies/data/PRIO'
    path_prio = location + '/priogrid_shapefiles.zip'

    if os.path.isfile(path_prio) == True:
        
        print('File already downloaded')
        prio_grid = gpd.read_file('zip://' + path_prio)

    else:
        print('Beginning file download PRIO...')
        url_prio = 'http://file.prio.no/ReplicationData/PRIO-GRID/priogrid_shapefiles.zip'

        urllib.request.urlretrieve(url_prio, path_prio)
        prio_grid = gpd.read_file('zip://' + path_prio)

    return prio_grid

In [3]:
def get_bodies_dfs():
    data_dir = '/home/simon/Documents/Bodies/data/done_dfs/'

    with open(f'{data_dir}bodies_df_2022_v1_3.pkl', 'rb') as file:
        bodies_df3 = pickle.load(file)

    with open(f'{data_dir}bodies_df_2022_v1_0.pkl', 'rb') as file:
        bodies_df1 = pickle.load(file)

    return(bodies_df1, bodies_df3)

In [4]:
prio_grid = get_prio_shape()
bodies_df1, bodies_df3 = get_bodies_dfs()

File already downloaded


In [5]:
# cites to check
city_list = ['Husaybah', 'Karabilah', 'Ar Rabit']

In [6]:
for i in city_list:

    print(i + '*'*30)
    print(bodies_df1[bodies_df1['city'] == i]['caption/abstract'].unique())

Husaybah******************************
['Operation Steel Curtain went into neighboring Karabilah Husaybah in Anbar province. Marines went house to house in an attempt to quell the insurgency in this area close to the Syrian border.\r\rMarines in Karabilah\r\r'
 'Operation Steel Curtain in Husaybah in Anbar province. Marines went house to house in an attempt to quell the insurgency in this area close to the Syrian border.\r\rMarines pass a mound of rubble created by airstrikes in Husaybah.\r'
 'Operation Steel Curtain went into neighboring Karabilah Husaybah in Anbar province. Marines went house to house in an attempt to quell the insurgency in this area close to the Syrian border.\r\rMarines next to a white flag atop an abandoned building in Karabilah. Many have posted white flags but left anyway in light of the heavy bombardments.\r\r'
 'Operation  Steel Curtain continued on its sixth day in Husaybah in Anbar province, with Marines going house to house in an attempt to quell the insur

Conclusion: this is the western Husaybah near Syria (operation steel curtain), not not one near Baghdad. change it.

In [7]:
latitude_old = bodies_df3[bodies_df3['city'] == 'Husaybah'].loc[:,'latitude_full'].unique().item()
longitude_old = bodies_df3[bodies_df3['city'] == 'Husaybah'].loc[:,'longitude_full'].unique().item()

# https://en.wikipedia.org/wiki/Husaybah
latitude_new = 34.392778 
longitude_new = 40.986944

#34° 23′ 34″ N, 40° 59′ 13″ E 
#34.392778
#40.986944

In [8]:
#Husaybah_df = bodies_df3[bodies_df3['city'] == 'Husaybah'].loc[:,['latitude_p', 'longitude_p', 'longitude_full', 'latitude_full', 'gid', 'xcoord', 'ycoord', 'col', 'row']]

Husaybah_mask = (bodies_df3['longitude_full'] == longitude_old) & (bodies_df3['latitude_full'] == latitude_old)

Husaybah_df = bodies_df3[Husaybah_mask].loc[:,['longitude_full', 'latitude_full', 'gid', 'xcoord', 'ycoord', 'col', 'row']]
Husaybah_df

Unnamed: 0,longitude_full,latitude_full,gid,xcoord,ycoord,col,row
64,43.453298,33.420684,177567,43.25,33.25,447,247
69,43.453298,33.420684,177567,43.25,33.25,447,247
70,43.453298,33.420684,177567,43.25,33.25,447,247
133,43.453298,33.420684,177567,43.25,33.25,447,247
350,43.453298,33.420684,177567,43.25,33.25,447,247
...,...,...,...,...,...,...,...
141193,43.453298,33.420684,177567,43.25,33.25,447,247
141194,43.453298,33.420684,177567,43.25,33.25,447,247
141195,43.453298,33.420684,177567,43.25,33.25,447,247
141196,43.453298,33.420684,177567,43.25,33.25,447,247


In [9]:
Husaybah_df['longitude_full'] = [longitude_new] * Husaybah_df['longitude_full'].shape[0]
Husaybah_df['latitude_full'] = [latitude_new] * Husaybah_df['latitude_full'].shape[0]
Husaybah_df

Unnamed: 0,longitude_full,latitude_full,gid,xcoord,ycoord,col,row
64,40.986944,34.392778,177567,43.25,33.25,447,247
69,40.986944,34.392778,177567,43.25,33.25,447,247
70,40.986944,34.392778,177567,43.25,33.25,447,247
133,40.986944,34.392778,177567,43.25,33.25,447,247
350,40.986944,34.392778,177567,43.25,33.25,447,247
...,...,...,...,...,...,...,...
141193,40.986944,34.392778,177567,43.25,33.25,447,247
141194,40.986944,34.392778,177567,43.25,33.25,447,247
141195,40.986944,34.392778,177567,43.25,33.25,447,247
141196,40.986944,34.392778,177567,43.25,33.25,447,247


In [10]:
Husaybah_gdf = gpd.GeoDataFrame(Husaybah_df[['longitude_full' ,'latitude_full']], geometry=gpd.points_from_xy(Husaybah_df.longitude_full, Husaybah_df.latitude_full))
Husaybah_gdf = Husaybah_gdf.set_crs(prio_grid.crs)

In [11]:
Husaybah_gdf

Unnamed: 0,longitude_full,latitude_full,geometry
64,40.986944,34.392778,POINT (40.98694 34.39278)
69,40.986944,34.392778,POINT (40.98694 34.39278)
70,40.986944,34.392778,POINT (40.98694 34.39278)
133,40.986944,34.392778,POINT (40.98694 34.39278)
350,40.986944,34.392778,POINT (40.98694 34.39278)
...,...,...,...
141193,40.986944,34.392778,POINT (40.98694 34.39278)
141194,40.986944,34.392778,POINT (40.98694 34.39278)
141195,40.986944,34.392778,POINT (40.98694 34.39278)
141196,40.986944,34.392778,POINT (40.98694 34.39278)


In [12]:
Husaybah_gdf_updated = prio_grid.sjoin(Husaybah_gdf, how = 'right')
Husaybah_gdf_updated.drop(columns= ['index_left'], inplace = True)
Husaybah_gdf_updated

Unnamed: 0,gid,xcoord,ycoord,col,row,longitude_full,latitude_full,geometry
64,179002,40.75,34.25,442,249,40.986944,34.392778,POINT (40.98694 34.39278)
69,179002,40.75,34.25,442,249,40.986944,34.392778,POINT (40.98694 34.39278)
70,179002,40.75,34.25,442,249,40.986944,34.392778,POINT (40.98694 34.39278)
133,179002,40.75,34.25,442,249,40.986944,34.392778,POINT (40.98694 34.39278)
350,179002,40.75,34.25,442,249,40.986944,34.392778,POINT (40.98694 34.39278)
...,...,...,...,...,...,...,...,...
141193,179002,40.75,34.25,442,249,40.986944,34.392778,POINT (40.98694 34.39278)
141194,179002,40.75,34.25,442,249,40.986944,34.392778,POINT (40.98694 34.39278)
141195,179002,40.75,34.25,442,249,40.986944,34.392778,POINT (40.98694 34.39278)
141196,179002,40.75,34.25,442,249,40.986944,34.392778,POINT (40.98694 34.39278)


In [13]:
Husaybah_gdf_updated.iloc[1,:]

gid                                    179002
xcoord                                  40.75
ycoord                                  34.25
col                                       442
row                                       249
longitude_full                      40.986944
latitude_full                       34.392778
geometry          POINT (40.986944 34.392778)
Name: 69, dtype: object

In [14]:
bodies_df3[bodies_df3['city'] == 'Karabilah'][['latitude_p', 'longitude_p', 'longitude_full', 'latitude_full', 'gid', 'xcoord', 'ycoord', 'col', 'row']].iloc[1,:]
# check long_p and lat_p at some point.. 

latitude_p                  NaN
longitude_p                 NaN
longitude_full        41.046437
latitude_full         34.390889
gid               179003.000000
xcoord                41.250000
ycoord                34.250000
col                  443.000000
row                  249.000000
Name: 141212, dtype: float64

In [15]:
bodies_df3[bodies_df3['city'] == 'Ar Rabit'][['latitude_p', 'longitude_p', 'longitude_full', 'latitude_full', 'gid', 'xcoord', 'ycoord', 'col', 'row']].iloc[1,:]

latitude_p                  NaN
longitude_p                 NaN
longitude_full        41.037860
latitude_full         34.427567
gid               179003.000000
xcoord                41.250000
ycoord                34.250000
col                  443.000000
row                  249.000000
Name: 141499, dtype: float64

In [16]:
bodies_df3[bodies_df3['city'] == 'Husaybah'].loc[:,['gid', 'xcoord', 'ycoord', 'col', 'row', 'longitude_full', 'latitude_full']]

Unnamed: 0,gid,xcoord,ycoord,col,row,longitude_full,latitude_full
141108,177567,43.25,33.25,447,247,43.453298,33.420684
141109,177567,43.25,33.25,447,247,43.453298,33.420684
141110,177567,43.25,33.25,447,247,43.453298,33.420684
141111,177567,43.25,33.25,447,247,43.453298,33.420684
141112,177567,43.25,33.25,447,247,43.453298,33.420684
...,...,...,...,...,...,...,...
141193,177567,43.25,33.25,447,247,43.453298,33.420684
141194,177567,43.25,33.25,447,247,43.453298,33.420684
141195,177567,43.25,33.25,447,247,43.453298,33.420684
141196,177567,43.25,33.25,447,247,43.453298,33.420684


In [17]:
# for i in Husaybah_gdf_updated.columns[:-1]:
#     bodies_df3.loc[bodies_df3['city'] == 'Husaybah', i] = Husaybah_gdf_updated.loc[:,i]

Husaybah_mask = (bodies_df3['longitude_full'] == longitude_old) & (bodies_df3['latitude_full'] == latitude_old)

for i in Husaybah_gdf_updated.columns[:-1]:
    bodies_df3.loc[Husaybah_mask, i] = Husaybah_gdf_updated.loc[:,i]

In [18]:
bodies_df3[Husaybah_mask].loc[:,['gid', 'xcoord', 'ycoord', 'col', 'row', 'longitude_full', 'latitude_full']]

Unnamed: 0,gid,xcoord,ycoord,col,row,longitude_full,latitude_full
64,179002,40.75,34.25,442,249,40.986944,34.392778
69,179002,40.75,34.25,442,249,40.986944,34.392778
70,179002,40.75,34.25,442,249,40.986944,34.392778
133,179002,40.75,34.25,442,249,40.986944,34.392778
350,179002,40.75,34.25,442,249,40.986944,34.392778
...,...,...,...,...,...,...,...
141193,179002,40.75,34.25,442,249,40.986944,34.392778
141194,179002,40.75,34.25,442,249,40.986944,34.392778
141195,179002,40.75,34.25,442,249,40.986944,34.392778
141196,179002,40.75,34.25,442,249,40.986944,34.392778


Done. Now fix so you only have one Baghdad and Rawa...

# Rawah

In [19]:
print(bodies_df1[bodies_df1['city'] == 'Rawah']['caption/abstract'].unique())

['In the attempt to close down a corridor used by foreign fighters along the Euphrates river, a new COP (Combat Outpost) has been set up in Rawah, in Anbar Province. According to US forces intelligence, Rawah served as a hub in shuttling foreign fighters into the country. \rLarge parts of the population fled this city of 20,000 in anticipation of the fighting, and the armored Stryker units saw some small arms fire as well as a number of different explosive devices over the first weeks. The units continually conduct searches  as well as handing out leaflets with tiplines and pictures of Abu Musab Zarqawi. '
 'Three merchants wait anxiously while their truck is searched, at a temporary checkpoint set up at the bridge entering Rawah by soldiers from the 2-14 Cavalry, 1st Brigade 25th Infantry Division. \r\rIn an attempt to close down a corridor used by foreign fighters along the Euphrates river, a new COP (Combat Outpost) has been set up in Rawah on the North side of the river, in Western

So this in Rawah in the Western Anbar Province along the river Euphrates. so fix coords.

"In an attempt to close down a corridor used by foreign fighters along the Euphrates river, a new COP (Combat Outpost) has been set up in Rawah on the North side of the river, in Western Anbar Province. According to US forces intelligence, Rawah served as a hub in shuttling foreign fighters into the country."

In [20]:
latitude_old = bodies_df3[bodies_df3['city'] == 'Rawah'].loc[:,'latitude_full'].unique().item()
longitude_old = bodies_df3[bodies_df3['city'] == 'Rawah'].loc[:,'longitude_full'].unique().item()

# https://en.wikipedia.org/wiki/Rawa,_Iraq

latitude_new = 34.468611
longitude_new = 41.916667

In [21]:
bodies_df3[bodies_df3['city'] == 'Rawah'][['longitude_full', 'latitude_full']]

Unnamed: 0,longitude_full,latitude_full
151370,43.101652,35.081981
151371,43.101652,35.081981
151372,43.101652,35.081981
151373,43.101652,35.081981
151374,43.101652,35.081981
...,...,...
151429,43.101652,35.081981
151430,43.101652,35.081981
151431,43.101652,35.081981
151432,43.101652,35.081981


In [22]:
# Rawah_df = bodies_df3[bodies_df3['city'] == 'Rawah'].loc[:,['latitude_p', 'longitude_p', 'longitude_full', 'latitude_full', 'gid', 'xcoord', 'ycoord', 'col', 'row']]
# Rawah_df

Rawah_mask = (bodies_df3['longitude_full'] == longitude_old) & (bodies_df3['latitude_full'] == latitude_old)

Rawah_df = bodies_df3[Rawah_mask].loc[:,['longitude_full', 'latitude_full', 'gid', 'xcoord', 'ycoord', 'col', 'row']]
Rawah_df


Unnamed: 0,longitude_full,latitude_full,gid,xcoord,ycoord,col,row
46,43.101652,35.081981,180447,43.25,35.25,447,251
56,43.101652,35.081981,180447,43.25,35.25,447,251
87,43.101652,35.081981,180447,43.25,35.25,447,251
156,43.101652,35.081981,180447,43.25,35.25,447,251
172,43.101652,35.081981,180447,43.25,35.25,447,251
...,...,...,...,...,...,...,...
151429,43.101652,35.081981,180447,43.25,35.25,447,251
151430,43.101652,35.081981,180447,43.25,35.25,447,251
151431,43.101652,35.081981,180447,43.25,35.25,447,251
151432,43.101652,35.081981,180447,43.25,35.25,447,251


In [23]:
Rawah_df['longitude_full'] = [longitude_new] * Rawah_df['longitude_full'].shape[0]
Rawah_df['latitude_full'] = [latitude_new] * Rawah_df['latitude_full'].shape[0]
Rawah_df

Unnamed: 0,longitude_full,latitude_full,gid,xcoord,ycoord,col,row
46,41.916667,34.468611,180447,43.25,35.25,447,251
56,41.916667,34.468611,180447,43.25,35.25,447,251
87,41.916667,34.468611,180447,43.25,35.25,447,251
156,41.916667,34.468611,180447,43.25,35.25,447,251
172,41.916667,34.468611,180447,43.25,35.25,447,251
...,...,...,...,...,...,...,...
151429,41.916667,34.468611,180447,43.25,35.25,447,251
151430,41.916667,34.468611,180447,43.25,35.25,447,251
151431,41.916667,34.468611,180447,43.25,35.25,447,251
151432,41.916667,34.468611,180447,43.25,35.25,447,251


In [24]:
Rawah_gdf = gpd.GeoDataFrame(Rawah_df[['longitude_full' ,'latitude_full']], geometry=gpd.points_from_xy(Rawah_df.longitude_full, Rawah_df.latitude_full))
Rawah_gdf = Rawah_gdf.set_crs(prio_grid.crs)
Rawah_gdf


Unnamed: 0,longitude_full,latitude_full,geometry
46,41.916667,34.468611,POINT (41.91667 34.46861)
56,41.916667,34.468611,POINT (41.91667 34.46861)
87,41.916667,34.468611,POINT (41.91667 34.46861)
156,41.916667,34.468611,POINT (41.91667 34.46861)
172,41.916667,34.468611,POINT (41.91667 34.46861)
...,...,...,...
151429,41.916667,34.468611,POINT (41.91667 34.46861)
151430,41.916667,34.468611,POINT (41.91667 34.46861)
151431,41.916667,34.468611,POINT (41.91667 34.46861)
151432,41.916667,34.468611,POINT (41.91667 34.46861)


In [25]:
Rawah_gdf_updated = prio_grid.sjoin(Rawah_gdf, how = 'right')
Rawah_gdf_updated.drop(columns= ['index_left'], inplace = True)
Rawah_gdf_updated

Unnamed: 0,gid,xcoord,ycoord,col,row,longitude_full,latitude_full,geometry
46,179004,41.75,34.25,444,249,41.916667,34.468611,POINT (41.91667 34.46861)
56,179004,41.75,34.25,444,249,41.916667,34.468611,POINT (41.91667 34.46861)
87,179004,41.75,34.25,444,249,41.916667,34.468611,POINT (41.91667 34.46861)
156,179004,41.75,34.25,444,249,41.916667,34.468611,POINT (41.91667 34.46861)
172,179004,41.75,34.25,444,249,41.916667,34.468611,POINT (41.91667 34.46861)
...,...,...,...,...,...,...,...,...
151429,179004,41.75,34.25,444,249,41.916667,34.468611,POINT (41.91667 34.46861)
151430,179004,41.75,34.25,444,249,41.916667,34.468611,POINT (41.91667 34.46861)
151431,179004,41.75,34.25,444,249,41.916667,34.468611,POINT (41.91667 34.46861)
151432,179004,41.75,34.25,444,249,41.916667,34.468611,POINT (41.91667 34.46861)


In [26]:
bodies_df3[bodies_df3['city'] == 'Rawah'].loc[:,['gid', 'xcoord', 'ycoord', 'col', 'row', 'longitude_full', 'latitude_full']]

Unnamed: 0,gid,xcoord,ycoord,col,row,longitude_full,latitude_full
151370,180447,43.25,35.25,447,251,43.101652,35.081981
151371,180447,43.25,35.25,447,251,43.101652,35.081981
151372,180447,43.25,35.25,447,251,43.101652,35.081981
151373,180447,43.25,35.25,447,251,43.101652,35.081981
151374,180447,43.25,35.25,447,251,43.101652,35.081981
...,...,...,...,...,...,...,...
151429,180447,43.25,35.25,447,251,43.101652,35.081981
151430,180447,43.25,35.25,447,251,43.101652,35.081981
151431,180447,43.25,35.25,447,251,43.101652,35.081981
151432,180447,43.25,35.25,447,251,43.101652,35.081981


In [27]:
# for i in Rawah_gdf_updated.columns[:-1]:
#     bodies_df3.loc[bodies_df3['city'] == 'Rawah', i] = Rawah_gdf_updated.loc[:,i]

Rawah_mask = (bodies_df3['longitude_full'] == longitude_old) & (bodies_df3['latitude_full'] == latitude_old)

for i in Rawah_gdf_updated.columns[:-1]:
    bodies_df3.loc[Rawah_mask, i] = Rawah_gdf_updated.loc[:,i]


In [28]:
bodies_df3[bodies_df3['city'] == 'Rawah'].loc[:,['gid', 'xcoord', 'ycoord', 'col', 'row', 'longitude_full', 'latitude_full']]

Unnamed: 0,gid,xcoord,ycoord,col,row,longitude_full,latitude_full
151370,179004,41.75,34.25,444,249,41.916667,34.468611
151371,179004,41.75,34.25,444,249,41.916667,34.468611
151372,179004,41.75,34.25,444,249,41.916667,34.468611
151373,179004,41.75,34.25,444,249,41.916667,34.468611
151374,179004,41.75,34.25,444,249,41.916667,34.468611
...,...,...,...,...,...,...,...
151429,179004,41.75,34.25,444,249,41.916667,34.468611
151430,179004,41.75,34.25,444,249,41.916667,34.468611
151431,179004,41.75,34.25,444,249,41.916667,34.468611
151432,179004,41.75,34.25,444,249,41.916667,34.468611


# And Baghdad name...

In [29]:
for i in ['gid', 'xcoord', 'ycoord', 'col', 'row', 'longitude_full', 'latitude_full']:
    print(i)
    print(bodies_df3[bodies_df3['city'].isin(['Bagdad','Baghdad'])][i].unique())

# so it is just the name

gid
[177569]
xcoord
[44.25]
ycoord
[33.25]
col
[449]
row
[247]
longitude_full
[44.3787992]
latitude_full
[33.3024309]


In [30]:
bodies_df3.loc[bodies_df3['city'] == 'Bagdad', 'city'] = 'Baghdad'

In [31]:
bodies_df3['city'].unique()

array(['nan', 'Baghdad', 'Basra', 'Husaybah', 'Baquba', 'Karabilah',
       'Mosul', 'Hilla', 'Samarra', 'Dujail', 'Ar Rabit', 'Fallujah',
       'Kerbala', 'Al-Awja', 'Abu Ghraib', 'Kirkuk', 'Al Qaim', 'Taji',
       'Balad', 'Al Karamah Border Crossing', 'Baqouba', 'Ramadi',
       'Kara-Ula', 'Zaxo', 'Najaf', 'Cizre', 'Sirnak', 'Falluja', 'cizre',
       'Habur Port', 'Ad Dawr', 'Al-Samawa', 'Al Asad', 'Habbaniyah',
       'Al Taqaddum', 'Rawah'], dtype=object)

In [None]:
# updata UCDP and PRIO for Rawah and Husaybah....
# Also if you infere citi to all you get more locations for fixed effects... 

In [32]:
data_dir = '/home/simon/Documents/Bodies/data/done_dfs/'
bodies_df3.to_pickle(f'{data_dir}bodies_df_2022_v1_4.pkl')
bodies_df3.to_csv(f'{data_dir}bodies_df_2022_v1_4.csv')

subset = ['img_id', 'person_mean', 'male_mean', 'falgIRQ_mean', 'female_mean', 'religiousGarmentFemale_mean', 'uniformed_mean',
          'firearm_mean', 'flagUS_mean', 'militaryVehicle_mean','publication', 'year', 'month', 'distance_days', 'longitude_full', 'latitude_full', 
          'month_id', 'gid', 'xcoord', 'ycoord','gwno', 'deaths_a', 'deaths_b', 'deaths_civilians', 'deaths_unknown', 'best', 'high', 'low', 'log_best', 
          'log_low', 'log_high','agri_ih', 'barren_ih', 'bdist3', 'capdist', 'excluded', 'gcp_mer', 'gcp_ppp', 'irrig_sum', 'nlights_calib_mean', 
          'nlights_mean', 'pasture_ih', 'petroleum_y', 'pop_gpw_sd', 'pop_gpw_sum', 'prec_gpcp', 'urban_ih', 'cmr_mean', 'imr_mean', 'mountains_mean', 
          'ttime_mean', 'tce', 'tsce', 'emotions_t2_mean', 'all_formal_mean', 'all_privat_mean', 'all_urban_mean', 'all_militarized_mean', 
          'damaged_property_mean', 'all_public_mean', 'emotions_t1_mean', 'mass_protest_mean', 'all_rural_mean']

bodies_df3_sub = bodies_df3[subset]

bodies_df3_sub.to_pickle(f"{data_dir}/bodies_df_2022_v1_4_JACOB.pkl")  
bodies_df3_sub.to_csv(f"{data_dir}/bodies_df_2022_v1_4_JACOB.csv")  