In [1]:
import pandas as pd
import datetime as dt

# Suicide Data

In [2]:
suicide_data = pd.read_csv('suicide_data.csv') # suicide deaths per 100k

In [3]:
suicide_data.shape

(10980, 34)

In [4]:
df = pd.DataFrame(suicide_data)

In [5]:
df.head()

Unnamed: 0,IndicatorCode,Indicator,ValueType,ParentLocationCode,ParentLocation,Location type,SpatialDimValueCode,Location,Period type,Period,...,FactValueUoM,FactValueNumericLowPrefix,FactValueNumericLow,FactValueNumericHighPrefix,FactValueNumericHigh,Value,FactValueTranslationID,FactComments,Language,DateModified
0,MH_12,Age-standardized suicide rates (per 100 000 po...,numeric,AMR,Americas,Country,ATG,Antigua and Barbuda,Year,2019,...,,,0.0,,0.0,0 [0 – 0],,,EN,2021-02-09T06:00:00.000Z
1,MH_12,Age-standardized suicide rates (per 100 000 po...,numeric,AMR,Americas,Country,BRB,Barbados,Year,2019,...,,,0.11,,0.22,0.16 [0.11 – 0.22],,,EN,2021-02-09T06:00:00.000Z
2,MH_12,Age-standardized suicide rates (per 100 000 po...,numeric,AMR,Americas,Country,BRB,Barbados,Year,2019,...,,,0.22,,0.42,0.31 [0.22 – 0.42],,,EN,2021-02-09T06:00:00.000Z
3,MH_12,Age-standardized suicide rates (per 100 000 po...,numeric,AMR,Americas,Country,ATG,Antigua and Barbuda,Year,2019,...,,,0.22,,0.45,0.32 [0.22 – 0.45],,,EN,2021-02-09T06:00:00.000Z
4,MH_12,Age-standardized suicide rates (per 100 000 po...,numeric,AMR,Americas,Country,BRB,Barbados,Year,2019,...,,,0.34,,0.65,0.49 [0.34 – 0.65],,,EN,2021-02-09T06:00:00.000Z


## Data Cleaning

In [6]:
column_names = list(df.columns)

# print unique values for each column to get a sense of what they hold
for x in column_names:
    print(f"{x}   {df[x].unique()}")

IndicatorCode   ['MH_12']
Indicator   ['Age-standardized suicide rates (per 100 000 population)']
ValueType   ['numeric']
ParentLocationCode   ['AMR' 'EMR' 'WPR' 'SEAR' 'EUR' 'AFR']
ParentLocation   ['Americas' 'Eastern Mediterranean' 'Western Pacific' 'South-East Asia'
 'Europe' 'Africa']
Location type   ['Country']
SpatialDimValueCode   ['ATG' 'BRB' 'GRD' 'VCT' 'VEN' 'KWT' 'SYR' 'BRN' 'MDV' 'JOR' 'HND' 'PAN'
 'JAM' 'ARM' 'CYP' 'MMR' 'OMN' 'STP' 'IDN' 'TUR' 'BHS' 'PHL' 'SYC' 'PER'
 'GRC' 'LCA' 'AZE' 'BGD' 'LBN' 'QAT' 'COL' 'GHA' 'BLZ' 'TUN' 'DOM' 'DZA'
 'SAU' 'NIC' 'PNG' 'CRI' 'MAR' 'NGA' 'NER' 'CUB' 'NZL' 'GTM' 'BRA' 'CAN'
 'SSD' 'AUT' 'GBR' 'UGA' 'MLI' 'VNM' 'BGR' 'MWI' 'PRK' 'MKD' 'SEN' 'KEN'
 'HRV' 'DNK' 'IND' 'SLV' 'ISL' 'HTI' 'SLE' 'AUS' 'COM' 'PRT' 'COG' 'HUN'
 'SUR' 'LUX' 'UZB' 'ECU' 'DJI' 'EST' 'BDI' 'MDA' 'SRB' 'JPN' 'GIN' 'SWE'
 'GNB' 'COD' 'NLD' 'AGO' 'ROU' 'SGP' 'BEN' 'DEU' 'LKA' 'FJI' 'TTO' 'GAB'
 'FSM' 'TCD' 'GMB' 'MDG' 'CHL' 'NOR' 'FIN' 'KOR' 'ZWE' 'ARG' 'BIH' 'KGZ'
 '

In [7]:
# Drop rows with gender-specific data
drop_index = []

for row in range(df.shape[0]):
    gender = df['Dim1'][row]
    
    if gender != 'Both sexes':
        drop_index.append(row) 
    
df.drop(drop_index, inplace = True)
df.reset_index(drop=True, inplace = True)

In [8]:
# Create new DF with useful columns
suicide_df = df[['Location',
         'Period',
         'FactValueNumeric',
         'FactValueNumericLow',
         'FactValueNumericHigh']]

In [9]:
# Change period to year to match earthquake_df since it makes more sense
suicide_df.rename(columns = {"Period": "year"}, inplace = True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  suicide_df.rename(columns = {"Period": "year"}, inplace = True)


In [10]:
suicide_df.head()

Unnamed: 0,Location,year,FactValueNumeric,FactValueNumericLow,FactValueNumericHigh
0,Barbados,2019,0.31,0.22,0.42
1,Antigua and Barbuda,2019,0.32,0.22,0.45
2,Grenada,2019,0.64,0.45,0.88
3,Saint Vincent and the Grenadines,2019,1.01,0.7,1.41
4,Jordan,2019,1.98,1.29,2.9


# Earthquake Data

In [11]:
earthquake_data = pd.read_csv('Earthquakes/earthquake_data_all_years.csv')
earthquake_df = pd.DataFrame(earthquake_data)

In [12]:
earthquake_df.head()

Unnamed: 0,time,place,mag,type,geometry
0,978216704470,"50 km SW of Ashk?sham, Afghanistan",4.6,earthquake,"{'type': 'Point', 'coordinates': [71.126, 36.3..."
1,978216085570,"124 km S of Kokopo, Papua New Guinea",4.1,earthquake,"{'type': 'Point', 'coordinates': [152.303, -5...."
2,978208040890,"118 km S of Kokopo, Papua New Guinea",4.3,earthquake,"{'type': 'Point', 'coordinates': [152.377, -5...."
3,978206160040,"297 km SSE of Alo, Wallis and Futuna",4.0,earthquake,"{'type': 'Point', 'coordinates': [-177.261, -1..."
4,978205372710,"16 km S of Yonakuni, Japan",5.1,earthquake,"{'type': 'Point', 'coordinates': [122.974, 24...."


## Cleaning Data

In [13]:
earthquake_df.columns

Index(['time', 'place', 'mag', 'type', 'geometry'], dtype='object')

In [14]:
# Dropping columns with no location data
earthquake_df.dropna(subset = 'place', inplace = True)

# Dropping useless columns
earthquake_df.drop(columns = ['type', 'geometry'], inplace = True)
earthquake_df.reset_index(drop=True, inplace = True)

In [15]:
# Converting from UNIX to datetime and assigning year to a new column
years = []

for row in range(earthquake_df.shape[0]):
    date = earthquake_df['time'][row]
    
    # number of digits is important to get the right year (2000 vs 2279)
    if date > 1000000000000:
        date = str(date)[:10]
    else:
        date = str(date)[:9]
    
    year = dt.date.fromtimestamp(int(date)).year
    years.append(year)

earthquake_df['year'] = years

In [16]:
earthquake_df.head()

Unnamed: 0,time,place,mag,year
0,978216704470,"50 km SW of Ashk?sham, Afghanistan",4.6,2000
1,978216085570,"124 km S of Kokopo, Papua New Guinea",4.1,2000
2,978208040890,"118 km S of Kokopo, Papua New Guinea",4.3,2000
3,978206160040,"297 km SSE of Alo, Wallis and Futuna",4.0,2000
4,978205372710,"16 km S of Yonakuni, Japan",5.1,2000


In [17]:
locations = []

for row in range(earthquake_df.shape[0]):
    place = earthquake_df['place'][row]
    
    # location is what's after the comma (ex: Texas in Dallas, Texas)
    location = str(place).split(",")[-1]
    location = location.strip()

    # Removing the word 'region' from end of entries
    try:
        region_index = location.index('region')
        location = location[0: region_index].strip()
    except: ValueError

    locations.append(location)

In [18]:
# Getting a list of unique locations to check if the should be replaced to better match suicide data
unique_locations = []
for x in locations:
    if not(x in unique_locations):
        unique_locations.append(x)

In [19]:
suicide_countries = suicide_df['Location'].unique()

In [20]:
# Printing off locations that don't match suicide data
for x in locations:
    if not(x in suicide_countries):
        print(x)

Wallis and Futuna
Guam
Martinique
Russia
Banda Sea
Taiwan
south of the Fiji Islands
Taiwan
Oregon
California
Iran
Northern Mariana Islands
central Mediterranean Sea
Taiwan
Russia
Timor Leste
northwest of the Kuril Islands
Northern Mariana Islands
California
Mariana Islands
CA
Russia
south of Tonga
off the east coast of the North Island of New Zealand
Iran
New Caledonia
Alaska
south of Tonga
Alaska
Alaska
Iran
Norwegian Sea
south of Australia
south of Australia
south of Australia
Norwegian Sea
south of the Fiji Islands
Mariana Islands
south of the Fiji Islands
central East Pacific Rise
Kermadec Islands
Turkey
Micronesia
Svalbard and Jan Mayen
CA
southern Mid-Atlantic Ridge
Kermadec Islands
southeast of the Loyalty Islands
Iran
Alaska
Puerto Rico
West Chile Rise
Alaska
Northern Mariana Islands
Taiwan
Alaska
Russia
Iran
Alaska
United Kingdom
Guam
Reykjanes Ridge
Iran
Reykjanes Ridge
Reykjanes Ridge
Reykjanes Ridge
Reykjanes Ridge
Russia
Russia
California
California
California
California
R

Timor Leste
Alaska
Banda Sea
Wallis and Futuna
northern Mid-Atlantic Ridge
Mid-Indian Ridge
central Mid-Atlantic Ridge
southeast Indian Ridge
Guam
Southwest Indian Ridge
Southwest Indian Ridge
Kuril Islands
West Chile Rise
Banda Sea
Vietnam
Alaska
Adriatic Sea
Tanzania
Taiwan
Chagos Archipelago
Russia
south of the Fiji Islands
Taiwan
south of the Kermadec Islands
Russia
Alaska
south of the Fiji Islands
Turkey
north of Svalbard
Alaska
Northern Mariana Islands
Guam
Pacific-Antarctic Ridge
northern Mid-Atlantic Ridge
Alaska
Alaska
northern Mid-Atlantic Ridge
central East Pacific Rise
south of the Fiji Islands
Timor Leste
Russia
Alaska
Taiwan
Kuril Islands
Turkey
Guam
Alaska
Timor Leste
Russia
south of the Fiji Islands
Taiwan
Russia
Russia
Russia
Russia
Iran
northwest of the Kuril Islands
Kuril Islands
Alaska
Russia
Russia
Russia
south of the Fiji Islands
Wallis and Futuna
New Caledonia
south of the Fiji Islands
Russia
Adriatic Sea
Alaska
Alaska
Taiwan
Banda Sea
U.S. Virgin Islands
U.S. Vi

Iran
South Sandwich Islands
Mariana Islands
southern East Pacific Rise
South Sandwich Islands
South Sandwich Islands
Russia
Northern Mariana Islands
Alaska
south of the Fiji Islands
U.S. Virgin Islands
south of the Fiji Islands
Bolivia
northern Peru
northern Mid-Atlantic Ridge
Reykjanes Ridge
Afghanistan-Tajikistan border
northern Mid-Atlantic Ridge
Palau
Anguilla
Russia
U.S. Virgin Islands
Anguilla
U.S. Virgin Islands
Russia
Anguilla
Anguilla
Timor Leste
Anguilla
U.S. Virgin Islands
central Mid-Atlantic Ridge
Alaska
western Xizang
Kermadec Islands
southern Mid-Atlantic Ridge
Alaska
Turkey
east of the North Island of New Zealand
western Xizang
Timor Leste
Russia
Anguilla
U.S. Virgin Islands
western Xizang
western Xizang
CA
Kermadec Islands
Anguilla
California
U.S. Virgin Islands
Anguilla
Russia
Chile-Bolivia border
Persian Gulf
Alaska
Pacific-Antarctic Ridge
Anguilla
Anguilla
U.S. Virgin Islands
Anguilla
U.S. Virgin Islands
Anguilla
Anguilla
U.S. Virgin Islands
U.S. Virgin Islands
U.S.

Northern Mariana Islands
Northern Mariana Islands
Northern Mariana Islands
Northern Mariana Islands
Northern Mariana Islands
Northern Mariana Islands
Northern Mariana Islands
Northern Mariana Islands
Northern Mariana Islands
Northern Mariana Islands
Northern Mariana Islands
Northern Mariana Islands
Northern Mariana Islands
Northern Mariana Islands
Northern Mariana Islands
Northern Mariana Islands
Northern Mariana Islands
Northern Mariana Islands
Northern Mariana Islands
Northern Mariana Islands
Northern Mariana Islands
Alaska
Northern Mariana Islands
Northern Mariana Islands
Kermadec Islands
southern Mid-Atlantic Ridge
Taiwan
Banda Sea
southern Mid-Atlantic Ridge
New Caledonia
Balleny Islands
California
south of the Kermadec Islands
Alaska
Turkey
south of the Fiji Islands
off the coast of Oregon
off the coast of Oregon
Russia
off the coast of Oregon
western Xizang
Svalbard and Jan Mayen
U.S. Virgin Islands
Turkey
South Sandwich Islands
New Caledonia
New Caledonia
New Caledonia
Russia
n

South Sandwich Islands
off the east coast of the North Island of New Zealand
east of the Kuril Islands
Mid-Indian Ridge
Alaska
southern Mid-Atlantic Ridge
Russia
Mid-Indian Ridge
Russia
South Indian Ocean
Russia
north of Ascension Island
Russia
Kuril Islands
Russia
Alaska
off the coast of Oregon
off the coast of Oregon
Taiwan
off the coast of Oregon
South Sandwich Islands
Russia
Guam
off the coast of Oregon
Kuril Islands
off the coast of Oregon
Kermadec Islands
off the coast of Oregon
off the coast of Oregon
Greenland Sea
Russia
Russia
Russia
South Sandwich Islands
South Sandwich Islands
South Sandwich Islands
California
Russia
Russia
Russia
Turkey
Svalbard and Jan Mayen
Alaska
southeast of the Loyalty Islands
Iran
Kermadec Islands
central East Pacific Rise
Russia
Iran
Iran
Russia
Pacific-Antarctic Ridge
Russia
Russia
south of the Kermadec Islands
south of the Fiji Islands
Russia
Russia
Greenland Sea
Russia
Iran
Russia
Russia
Russia
north of Severnaya Zemlya
Mid-Indian Ridge
Banda Sea


Northern Mariana Islands
Timor Leste
Alaska
Indian Ocean Triple Junction
Alaska
Mid-Indian Ridge
Russia
South Sandwich Islands
South Sandwich Islands
South Sandwich Islands
South Sandwich Islands
Beaufort Sea
South Sandwich Islands
Azores-Cape St. Vincent Ridge
Russia
south of the Kermadec Islands
south of the Fiji Islands
Alaska
south of the Fiji Islands
Oklahoma
South Sandwich Islands
Alaska
Russia
Southwest Indian Ridge
Alaska
Russia
California
Owen Fracture Zone
south of the Kermadec Islands
Timor Leste
Taiwan
French Southern Territories
south of the Fiji Islands
Iran
central Mid-Atlantic Ridge
Banda Sea
Taiwan
Banda Sea
Timor Leste
Alaska
southern East Pacific Rise
northern Mid-Atlantic Ridge
Mariana Islands
South Sandwich Islands
western Indian-Antarctic Ridge
south of the Fiji Islands
Kermadec Islands
Russia
South Sandwich Islands
Banda Sea
Taiwan
Cayman Islands
Alaska
California
Timor Leste
South Sandwich Islands
Iran
Russia
south of the Kermadec Islands
South Sandwich Islands


southeast Indian Ridge
Chile-Argentina border
north of Severnaya Zemlya
north of Severnaya Zemlya
southern East Pacific Rise
Kermadec Islands
south of the Fiji Islands
south of the Fiji Islands
Russia
South Sandwich Islands
Venezuela
Northern Mariana Islands
south of the Fiji Islands
west of Macquarie Island
south of the Fiji Islands
Bay of Bengal
Colorado
South Sandwich Islands
Oklahoma
Banda Sea
Russia
Utah
off the east coast of the North Island of New Zealand
South Sandwich Islands
Russia
Mariana Islands
off the coast of Ecuador
Timor Leste
Russia
Turkey
Turkey
off the coast of Ecuador
Guam
Martinique
Alaska
southeast of Easter Island
Russia
Russia
Taiwan
Russia
Iran
Russia
Russia
south of the Fiji Islands
Turkey
Russia
off the coast of Ecuador
south of the Fiji Islands
Russia
Russia
Bolivia
Kuril Islands
South Sandwich Islands
South Sandwich Islands
south of Tonga
Alaska
Timor Leste
Alaska
South Sandwich Islands
Kashmir-India border
Alaska
south of Africa
Micronesia
Russia
Reykjane

Alaska
south of the Fiji Islands
Northern Mariana Islands
Ascension Island
Kermadec Islands
Puerto Rico
Guam
Timor Leste
South Sandwich Islands
south of Tonga
south of Tonga
western Xizang
Easter Island
eastern Turkey
Puerto Rico
Puerto Rico
Kermadec Islands
Northern Mariana Islands
Russia
Russia
Alaska
Russia
Taiwan
Kermadec Islands
U.S. Virgin Islands
Alaska
Mid-Indian Ridge
Mid-Indian Ridge
Puerto Rico
Puerto Rico
Puerto Rico
Puerto Rico
Puerto Rico
Puerto Rico
Idaho
Turkey
Alaska
South Shetland Islands
South Indian Ocean
south of the Fiji Islands
Guam
Iran
south of the Fiji Islands
Russia
south of the Fiji Islands
off the west coast of northern Sumatra
Puerto Rico
Northern Mariana Islands
North Indian Ocean
Kermadec Islands
Russia
U.S. Virgin Islands
south of the Fiji Islands
Russia
Turkey
Alaska
Turkey
Guam
Mid-Indian Ridge
Bolivia
south of the Fiji Islands
Puerto Rico
South Sandwich Islands
Mid-Indian Ridge
New Caledonia
Guam
CA
Mid-Indian Ridge
Mid-Indian Ridge
Peru-Ecuador bord

In [21]:
# Dealing with weird locations
us_states = ['Alabama', 'Alaska', 'Arizona', 'Arkansas','CA', 'California', 'Colorado', 'Connecticut', 'Washington DC', 'Delaware', 'Florida', 'Georgia', 'Hawaii', 'Idaho', 'Illinois', 'Indiana', 'Iowa', 'Kansas', 'Kentucky', 'Louisiana', 'Maine', 'Maryland', 'Massachusetts', 'Michigan', 'Minnesota', 'Mississippi', 'Missouri', 'Montana', 'Nebraska', 'Nevada', 'New Hampshire', 'New Jersey', 'New Mexico', 'New York', 'Puerto Rico' 'North Carolina', 'North Dakota', 'Ohio', 'Oklahoma', 'Oregon', 'Pennsylvania', 'Rhode Island', 'South Carolina', 'South Dakota', 'Tennessee', 'Texas', 'Utah', 'Vermont', 'Virginia', 'Washington', 'West Virginia', 'Wisconsin', 'Wyoming', 'Guam','AK','United States']

# Should probably put this in a separate file or something
location_fixer = {"Afghanistan":"Afghanistan","Taiwan":"Taiwan","Samoa":"Samoa","Sumatra":"Sumatra","Bay of Bengal":"India","Italy":"Italy","Venezuela": "Venezuela (Bolivarian Republic of)", "Syria": "Syrian Arab Republic", "Honduras": "Honduras", "Panama": "Panama", 'Russia':'Russian Federation', 'Fiji':'Fiji', "Iran": 'Iran (Islamic Republic of)', "Timor Leste":"Timor_Leste", "Tonga":"Tonga", "New Zealand":"New Zealand", "Norwegian":"Norway", "Australia": "Australia", "Turkey":'Türkiye','Micronesia':'Micronesia (Federated States of)',"Chile":"Chile", "United Kingdom":'United Kingdom of Great Britain and Northern Ireland', "Bolivia":'Bolivia (Plurinational State of)', "Indian ":"India","Bangaldesh":"Bangladesh","Mozambique":"Mozambique", "South Korea": 'Republic of Korea', "Svalbard":"Norway", "Greece":"Greece", "Xizang":"Tibet", "Algeria":"Algeria","Mauritius":"Mauritius","Tanzania":'United Republic of Tanzania', "Colombia":"Colombia","Armenia":"Armenia","Azerbaijan":"Azerbaijan","Mexico":"Mexico","Ecuador":"Ecuador","Chile":"Chile","Argentina":"Argentina","Congo":"Congo","Albania":"Albania","Honduras":"Honduras","Vietnam":"Viet Nam","Kazakhstan":"Kazakhstan", "Austria":"Austria","Slovenia":"Slovenia","Nicaragua":"Nicaragua","Mongolia":"Mongolia","Peru":"Peru","Macedonia":"Macedonia","Japan":"Japan", "Prince Edward Islands":"Canada", "Chile":"Chile", "North Korea":"Democratic People's Republic of Korea", "Iran": 'Iran (Islamic Republic of)', "Pakistan":"Pakistan","Tajikstan":"Tajikstan"}

# Loop through dict of locations to change
for k,v in location_fixer.items():
    
    # loop through locations list to compare to locations to change
    for x in range(len(locations)):
        location = locations[x]
        
        # if location is one to change, change it
        if k in location:
            locations[x] = v

# Change US states to 'United States of America'
for x in range(len(locations)):
    
    if locations[x] in us_states:
        locations[x] = 'United States of America'


In [22]:
# Assign locations list as new column
earthquake_df['Location'] = locations

In [23]:
# Drop rows if there's no suicide data for that location as cannot compare data if it does not exist
drop_index = []

for row in range(earthquake_df.shape[0]):
    location = earthquake_df['Location'][row]
    
    if not(location in suicide_countries):
            drop_index.append(row)
            
earthquake_df.drop(drop_index, inplace = True)
earthquake_df.reset_index(drop=True, inplace = True)

In [24]:
earthquake_df.head()

Unnamed: 0,time,place,mag,year,Location
0,978216704470,"50 km SW of Ashk?sham, Afghanistan",4.6,2000,Afghanistan
1,978216085570,"124 km S of Kokopo, Papua New Guinea",4.1,2000,Papua New Guinea
2,978208040890,"118 km S of Kokopo, Papua New Guinea",4.3,2000,Papua New Guinea
3,978205372710,"16 km S of Yonakuni, Japan",5.1,2000,Japan
4,978201164780,"156 km S of Merizo Village, Guam",4.4,2000,United States of America


In [25]:
earthquake_df.drop(columns = ['time', 'place'], inplace = True)

In [26]:
earthquake_df.head()

Unnamed: 0,mag,year,Location
0,4.6,2000,Afghanistan
1,4.1,2000,Papua New Guinea
2,4.3,2000,Papua New Guinea
3,5.1,2000,Japan
4,4.4,2000,United States of America


# Combining both

In [27]:
earthquake_suicide_df = earthquake_df.merge(right =  suicide_df, how = 'outer', on = ['Location', 'year'])

In [28]:
# Checking that merge was successful
earthquake_suicide_df[earthquake_suicide_df["Location"] == 'Japan']

Unnamed: 0,mag,year,Location,FactValueNumeric,FactValueNumericLow,FactValueNumericHigh
1420,5.1,2000,Japan,18.08,17.45,18.61
1421,3.8,2000,Japan,18.08,17.45,18.61
1422,4.6,2000,Japan,18.08,17.45,18.61
1423,4.0,2000,Japan,18.08,17.45,18.61
1424,3.9,2000,Japan,18.08,17.45,18.61
...,...,...,...,...,...,...
284067,4.3,2022,Japan,,,
284068,4.4,2022,Japan,,,
284069,4.8,2022,Japan,,,
284070,4.6,2022,Japan,,,


# Cleaning new DF

In [29]:
earthquake_countries = earthquake_df['Location'].unique()

# Dropping countries that do not have earthquakes
drop_index = []

for x in range(earthquake_suicide_df.shape[0]):
    if not(earthquake_suicide_df['Location'][x] in earthquake_countries):
        drop_index.append(x)
        
earthquake_suicide_df.drop(drop_index, inplace = True)
earthquake_suicide_df.reset_index(inplace = True, drop = True)

In [30]:
earthquake_suicide_df.head()

Unnamed: 0,mag,year,Location,FactValueNumeric,FactValueNumericLow,FactValueNumericHigh
0,4.6,2000,Afghanistan,7.71,4.44,12.48
1,4.0,2000,Afghanistan,7.71,4.44,12.48
2,4.2,2000,Afghanistan,7.71,4.44,12.48
3,4.2,2000,Afghanistan,7.71,4.44,12.48
4,3.6,2000,Afghanistan,7.71,4.44,12.48


In [31]:
lists = [1,2,3,4]
print(max(lists))

4


In [32]:
drop_index = []

year = earthquake_suicide_df['year'][0]
max_mag = earthquake_suicide_df['mag'][0]
country = earthquake_suicide_df['Location'][0]
# max_index = 0

for x in range(earthquake_suicide_df.shape[0]):
    mag = earthquake_suicide_df['mag'][x]
    row_country = earthquake_suicide_df['Location'][x]
    row_year = earthquake_suicide_df['year'][x]
    
    if (row_country == country) & (row_year == year):
        
        if max_mag < mag:
            try: 
                drop_index.append(max_index)
            except: NameError
            max_index = x
            max_mag = mag
        else:
            try: 
                drop_index.append(x)
            except: NameError
            
    
    else:
        year = row_year
        country = row_country
        max_mag = mag

earthquake_suicide_df.drop(drop_index, inplace = True)
earthquake_suicide_df.reset_index(inplace = True, drop = True)

In [33]:
earthquake_suicide_df.head()

Unnamed: 0,mag,year,Location,FactValueNumeric,FactValueNumericLow,FactValueNumericHigh
0,4.1,2000,Papua New Guinea,2.83,1.59,5.06
1,5.1,2000,Japan,18.08,17.45,18.61
2,4.4,2000,United States of America,10.02,9.65,15.03
3,3.9,2000,Fiji,11.69,8.01,16.45
4,4.4,2000,Yemen,8.51,4.12,14.75


In [35]:
earthquake_suicide_df.to_csv("earthquake_suicide_df.csv", index = False)

# Creating json

In [46]:
earthquake_suicide_df.set_index(['Location','year'], inplace = True)

KeyError: "None of ['Location'] are in the columns"

In [42]:
earthquake_suicide = earthquake_suicide_df.T

In [45]:
earthquake_suicide.head()

Location,Papua New Guinea,Japan,United States of America,Fiji,Yemen,Mexico,Turkmenistan,Chile,Russian Federation,Vanuatu,...,Mauritania,Saint Vincent and the Grenadines,Israel,Viet Nam,South Sudan,United Arab Emirates,Netherlands,Niger,Guinea,Ghana
mag,4.1,5.1,4.4,3.9,4.4,4.4,4.9,4.3,4.8,5.5,...,,,,,,,,,,
year,2000.0,2000.0,2000.0,2000.0,2000.0,2000.0,2000.0,2000.0,2000.0,2000.0,...,2000.0,2000.0,2000.0,2000.0,2000.0,2000.0,2000.0,2000.0,2000.0,2000.0
FactValueNumeric,2.83,18.08,10.02,11.69,8.51,3.86,13.85,10.52,48.89,23.22,...,6.39,6.45,6.76,7.17,7.89,8.01,8.09,9.51,9.73,9.75
FactValueNumericLow,1.59,17.45,9.65,8.01,4.12,3.66,11.64,9.15,47.51,12.57,...,3.4,4.82,5.75,4.07,4.35,4.71,7.1,5.15,5.57,6.46
FactValueNumericHigh,5.06,18.61,15.03,16.45,14.75,10.96,16.34,12.02,50.18,37.01,...,11.04,8.34,7.9,10.87,13.28,12.33,9.18,16.28,15.96,14.3


In [44]:
earthquake_suicide.to_json("earthquake_suicide_df.json")

ValueError: DataFrame columns must be unique for orient='columns'.