In [1]:
# Depenencies
import pandas as pd
from sqlalchemy import create_engine, inspect, func
import csv, os
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from flask import jsonify

In [3]:
# Importing csv files
ufo_path = "static/Resources/ufo_reports_raw.CSV"
ufo_data = pd.read_csv(ufo_path)

# Transformation Phase (Cleaning)

### USA UFO Sightings

In [4]:
ufo_data.head()

Unnamed: 0,summary,city,state,date_time,shape,duration,stats,report_link,text,posted,city_latitude,city_longitude
0,freaked me out,San Jose,CA,2021-05-06T21:00:00,sphere,2 minutes,Occurred : 5/6/2021 21:00 (Entered as : 05/06...,http://www.nuforc.org/webreports/163/S163041.html,freaked me out I looked up into the night sky ...,2021-05-20T00:00:00,37.338842,-121.889706
1,"One object observed at closer ground distance,...",Somerset,KY,,unknown,2400 hours,Occurred : Reported: 12/21/2019 8:49:14 PM 2...,http://www.nuforc.org/webreports/151/S151813.html,"One object observed at closer ground distance,...",,37.090361,-84.498169
2,UFO contact we made during Desert Shield in th...,,,,,,Occurred : Reported: 5/16/2021 4:55:36 PM 16...,http://www.nuforc.org/webreports/163/S163175.html,UFO contact we made during Desert Shield in th...,,,
3,"Over a course of 5 years, I have seen UFOs, ob...",Germantown,MD,2005-07-06T12:00:00,formation,Years,Occurred : 7/6/1905 12:00 (Entered as : 2014 ...,http://www.nuforc.org/webreports/162/S162867.html,"Over a course of 5 years, I have seen UFOs, ob...",2021-05-20T00:00:00,39.154986,-77.272538
4,"Driving north on US 87 just outside Brady, Tex...",Brady,TX,2020-01-29T19:45:00,light,Lights disappear,Occurred : 1/29/2020 19:45 (Entered as : 01/2...,http://www.nuforc.org/webreports/152/S152953.html,"Driving north on US 87 just outside Brady, Tex...",2020-01-31T00:00:00,31.1451,-99.3478


In [5]:
# Split date_time column
ufo_data[["date","time"]] = ufo_data['date_time'].str.split("T",expand=True)

# Convert date and time columns data type
ufo_data['date'] = pd.to_datetime(ufo_data['date'])
ufo_data['time'] = pd.to_datetime(ufo_data['time'])

In [6]:
ufo_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 98353 entries, 0 to 98352
Data columns (total 14 columns):
 #   Column          Non-Null Count  Dtype         
---  ------          --------------  -----         
 0   summary         98305 non-null  object        
 1   city            98052 non-null  object        
 2   state           92707 non-null  object        
 3   date_time       97027 non-null  object        
 4   shape           94682 non-null  object        
 5   duration        93952 non-null  object        
 6   stats           98316 non-null  object        
 7   report_link     98353 non-null  object        
 8   text            98280 non-null  object        
 9   posted          97027 non-null  object        
 10  city_latitude   80483 non-null  float64       
 11  city_longitude  80483 non-null  float64       
 12  date            97027 non-null  datetime64[ns]
 13  time            97027 non-null  datetime64[ns]
dtypes: datetime64[ns](2), float64(2), object(10)
memory us

In [7]:
#ufo_data.columns

to_drop = ['posted',
            'stats',
            'report_link',
            'date_time',
            'text'
            ]

ufo_data.drop(to_drop, axis=1, inplace=True)

ufo_data.head()

Unnamed: 0,summary,city,state,shape,duration,city_latitude,city_longitude,date,time
0,freaked me out,San Jose,CA,sphere,2 minutes,37.338842,-121.889706,2021-05-06,2021-12-13 21:00:00
1,"One object observed at closer ground distance,...",Somerset,KY,unknown,2400 hours,37.090361,-84.498169,NaT,NaT
2,UFO contact we made during Desert Shield in th...,,,,,,,NaT,NaT
3,"Over a course of 5 years, I have seen UFOs, ob...",Germantown,MD,formation,Years,39.154986,-77.272538,2005-07-06,2021-12-13 12:00:00
4,"Driving north on US 87 just outside Brady, Tex...",Brady,TX,light,Lights disappear,31.1451,-99.3478,2020-01-29,2021-12-13 19:45:00


In [8]:
# Drop UFO Sightings with no city and date
ufo_df = ufo_data.dropna(subset=['city', 'date', 'time'])

ufo_df.info()


<class 'pandas.core.frame.DataFrame'>
Int64Index: 96774 entries, 0 to 98352
Data columns (total 9 columns):
 #   Column          Non-Null Count  Dtype         
---  ------          --------------  -----         
 0   summary         96730 non-null  object        
 1   city            96774 non-null  object        
 2   state           91394 non-null  object        
 3   shape           93309 non-null  object        
 4   duration        92607 non-null  object        
 5   city_latitude   79569 non-null  float64       
 6   city_longitude  79569 non-null  float64       
 7   date            96774 non-null  datetime64[ns]
 8   time            96774 non-null  datetime64[ns]
dtypes: datetime64[ns](2), float64(2), object(5)
memory usage: 7.4+ MB


In [9]:
# Replacing NA values
values = {  'summary': 'observed', 
            'shape':'other',
            'duration':'unknown',
            'text':'observed'
            }
ufo_df2 = ufo_df.fillna(value=values)
ufo_df2.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 96774 entries, 0 to 98352
Data columns (total 9 columns):
 #   Column          Non-Null Count  Dtype         
---  ------          --------------  -----         
 0   summary         96774 non-null  object        
 1   city            96774 non-null  object        
 2   state           91394 non-null  object        
 3   shape           96774 non-null  object        
 4   duration        96774 non-null  object        
 5   city_latitude   79569 non-null  float64       
 6   city_longitude  79569 non-null  float64       
 7   date            96774 non-null  datetime64[ns]
 8   time            96774 non-null  datetime64[ns]
dtypes: datetime64[ns](2), float64(2), object(5)
memory usage: 7.4+ MB


In [10]:
# Change time column to day or night
def determine_time(time):
    h = time.hour
    return (
        "morning"
        if 5 <= h <= 11
        else "afternoon"
        if 12 <= h <= 17
        else "evening"
        if 18 <= h <= 22
        else "night"
    )

ufo_df2['time'] = ufo_df2.apply(lambda row: determine_time(row[8]), axis=1)

ufo_df2.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 96774 entries, 0 to 98352
Data columns (total 9 columns):
 #   Column          Non-Null Count  Dtype         
---  ------          --------------  -----         
 0   summary         96774 non-null  object        
 1   city            96774 non-null  object        
 2   state           91394 non-null  object        
 3   shape           96774 non-null  object        
 4   duration        96774 non-null  object        
 5   city_latitude   79569 non-null  float64       
 6   city_longitude  79569 non-null  float64       
 7   date            96774 non-null  datetime64[ns]
 8   time            96774 non-null  object        
dtypes: datetime64[ns](1), float64(2), object(6)
memory usage: 7.4+ MB


In [11]:
ufo_df2.head()

Unnamed: 0,summary,city,state,shape,duration,city_latitude,city_longitude,date,time
0,freaked me out,San Jose,CA,sphere,2 minutes,37.338842,-121.889706,2021-05-06,evening
3,"Over a course of 5 years, I have seen UFOs, ob...",Germantown,MD,formation,Years,39.154986,-77.272538,2005-07-06,afternoon
4,"Driving north on US 87 just outside Brady, Tex...",Brady,TX,light,Lights disappear,31.1451,-99.3478,2020-01-29,evening
5,meteor or space junk?,San Diego,CA,fireball,2 seconds,32.787229,-117.140268,2020-01-29,evening
6,"Shaped like a triangle, transparent like a lig...",Las Vegas,NV,triangle,30,36.141246,-115.186592,2020-01-28,afternoon


In [12]:
# Re-cast date column into object
ufo_df2['date'] = ufo_df2['date'].dt.strftime("%Y/%m/%d")
ufo_df2.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 96774 entries, 0 to 98352
Data columns (total 9 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   summary         96774 non-null  object 
 1   city            96774 non-null  object 
 2   state           91394 non-null  object 
 3   shape           96774 non-null  object 
 4   duration        96774 non-null  object 
 5   city_latitude   79569 non-null  float64
 6   city_longitude  79569 non-null  float64
 7   date            96774 non-null  object 
 8   time            96774 non-null  object 
dtypes: float64(2), object(7)
memory usage: 7.4+ MB


In [13]:
# Filter data to the past 40 years
# filtered_df = ufo_df2[ufo_df2['date'].dt.year > 1980]
# filtered_df.info()

In [14]:
# Drop UFO sightings with no coordinates 
ufo_df3 = ufo_df2.dropna(subset=['city_latitude', 'city_longitude'])

In [15]:
ufo_df3.reset_index(drop=True)
ufo_df3.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 79569 entries, 0 to 98352
Data columns (total 9 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   summary         79569 non-null  object 
 1   city            79569 non-null  object 
 2   state           79569 non-null  object 
 3   shape           79569 non-null  object 
 4   duration        79569 non-null  object 
 5   city_latitude   79569 non-null  float64
 6   city_longitude  79569 non-null  float64
 7   date            79569 non-null  object 
 8   time            79569 non-null  object 
dtypes: float64(2), object(7)
memory usage: 6.1+ MB


In [16]:
# Assign Country Code to Index
usa_id = pd.Series([840 for x in range(len(ufo_df3))])
ufo_df3['country_id'] = usa_id
ufo_df3 = ufo_df3.fillna(value=840)

usa_ufo_df = ufo_df3.set_index('country_id')
usa_ufo_df = usa_ufo_df.rename(columns={'city' : "city_name"})

usa_ufo_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ufo_df3['country_id'] = usa_id


Unnamed: 0_level_0,summary,city_name,state,shape,duration,city_latitude,city_longitude,date,time
country_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
840.0,freaked me out,San Jose,CA,sphere,2 minutes,37.338842,-121.889706,2021/05/06,evening
840.0,"Over a course of 5 years, I have seen UFOs, ob...",Germantown,MD,formation,Years,39.154986,-77.272538,2005/07/06,afternoon
840.0,"Driving north on US 87 just outside Brady, Tex...",Brady,TX,light,Lights disappear,31.145100,-99.347800,2020/01/29,evening
840.0,meteor or space junk?,San Diego,CA,fireball,2 seconds,32.787229,-117.140268,2020/01/29,evening
840.0,"Shaped like a triangle, transparent like a lig...",Las Vegas,NV,triangle,30,36.141246,-115.186592,2020/01/28,afternoon
...,...,...,...,...,...,...,...,...,...
840.0,White bright quick light,St. Petersburg,FL,flash,~2 seconds,27.794515,-82.675160,2021/03/27,night
840.0,It’s really hard to explain... other than at f...,Moses lake,WA,unknown,30,47.190100,-119.307400,2021/03/27,night
840.0,Object in sky above me walking my dog. Came do...,Yorba linda,CA,light,unknown,33.891402,-117.777835,2021/03/27,night
840.0,Saw a fuzzy green orb moving at high speed in ...,Apex,NC,circle,1-2 seconds,35.717334,-78.880864,2021/03/28,evening


In [17]:
usa_ufo_df.info()

<class 'pandas.core.frame.DataFrame'>
Float64Index: 79569 entries, 840.0 to 840.0
Data columns (total 9 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   summary         79569 non-null  object 
 1   city_name       79569 non-null  object 
 2   state           79569 non-null  object 
 3   shape           79569 non-null  object 
 4   duration        79569 non-null  object 
 5   city_latitude   79569 non-null  float64
 6   city_longitude  79569 non-null  float64
 7   date            79569 non-null  object 
 8   time            79569 non-null  object 
dtypes: float64(2), object(7)
memory usage: 6.1+ MB


### Other Country UFO Sightings

In [18]:
other_ufo_df = ufo_df2[ufo_df2['state'].isnull()]
other_ufo_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 5380 entries, 12 to 98344
Data columns (total 9 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   summary         5380 non-null   object 
 1   city            5380 non-null   object 
 2   state           0 non-null      object 
 3   shape           5380 non-null   object 
 4   duration        5380 non-null   object 
 5   city_latitude   0 non-null      float64
 6   city_longitude  0 non-null      float64
 7   date            5380 non-null   object 
 8   time            5380 non-null   object 
dtypes: float64(2), object(7)
memory usage: 420.3+ KB


In [19]:
other_ufo_df.head()

Unnamed: 0,summary,city,state,shape,duration,city_latitude,city_longitude,date,time
12,A series of soft white lights were traveling i...,Chennai (India),,light,10-15 minutes,,,2020/01/10,morning
84,Big UFO hovering over a house,Comalcalco (Mexico),,other,4:00,,,1980/06/01,afternoon
88,30+ orange balls playing in Australia,Hobart (Australia),,fireball,5 minutes,,,1982/03/16,evening
96,Simple triangle 40/50 yards across. Blue soft ...,Hahn A B (Germany),,triangle,7 minutes,,,1988/06/01,night
103,"Incredible bright light flooding the room, acc...","Chipping (small village, Lancashire)(UK/England)",,other,3-30 minutes,,,1990/06/01,night


In [20]:
# Split city column to obtain country
other_ufo_df[['new1', 'new2', 'new3']] = other_ufo_df['city'].str.split( '(' , expand=True, n=2)

# Drop Columns with no city and country combination
other_ufo_df = other_ufo_df.dropna(subset=['new2'])
other_ufo_df = other_ufo_df.drop(other_ufo_df[other_ufo_df['new3'].notnull()].index)
other_ufo_df.drop('new3', axis=1, inplace=True)

other_ufo_df.head()


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[k1] = value[k2]


Unnamed: 0,summary,city,state,shape,duration,city_latitude,city_longitude,date,time,new1,new2
12,A series of soft white lights were traveling i...,Chennai (India),,light,10-15 minutes,,,2020/01/10,morning,Chennai,India)
84,Big UFO hovering over a house,Comalcalco (Mexico),,other,4:00,,,1980/06/01,afternoon,Comalcalco,Mexico)
88,30+ orange balls playing in Australia,Hobart (Australia),,fireball,5 minutes,,,1982/03/16,evening,Hobart,Australia)
96,Simple triangle 40/50 yards across. Blue soft ...,Hahn A B (Germany),,triangle,7 minutes,,,1988/06/01,night,Hahn A B,Germany)
123,Cube of Fire,Stoke on Trent (UK/England),,fireball,4 minutes,,,1999/04/04,evening,Stoke on Trent,UK/England)


In [21]:
# Rename columns and drop unnecessary column
other_ufo_df.drop('city', axis=1, inplace=True)
other_ufo_df = other_ufo_df.rename(columns={"new1": "city_name", "new2": "country_name"})
other_ufo_df['country_name'] = other_ufo_df['country_name'].str.replace( ')','', regex=True)
#other_ufo_df['city_name'] = other_ufo_df['city_name'].str.replace( ' ','', regex=True)

other_ufo_df.head()

Unnamed: 0,summary,state,shape,duration,city_latitude,city_longitude,date,time,city_name,country_name
12,A series of soft white lights were traveling i...,,light,10-15 minutes,,,2020/01/10,morning,Chennai,India
84,Big UFO hovering over a house,,other,4:00,,,1980/06/01,afternoon,Comalcalco,Mexico
88,30+ orange balls playing in Australia,,fireball,5 minutes,,,1982/03/16,evening,Hobart,Australia
96,Simple triangle 40/50 yards across. Blue soft ...,,triangle,7 minutes,,,1988/06/01,night,Hahn A B,Germany
123,Cube of Fire,,fireball,4 minutes,,,1999/04/04,evening,Stoke on Trent,UK/England


In [22]:
# Replacing NA values
values = {  'state': 'unknown'
            }
other_ufo_df = other_ufo_df.fillna(value=values)
other_ufo_df.head()

Unnamed: 0,summary,state,shape,duration,city_latitude,city_longitude,date,time,city_name,country_name
12,A series of soft white lights were traveling i...,unknown,light,10-15 minutes,,,2020/01/10,morning,Chennai,India
84,Big UFO hovering over a house,unknown,other,4:00,,,1980/06/01,afternoon,Comalcalco,Mexico
88,30+ orange balls playing in Australia,unknown,fireball,5 minutes,,,1982/03/16,evening,Hobart,Australia
96,Simple triangle 40/50 yards across. Blue soft ...,unknown,triangle,7 minutes,,,1988/06/01,night,Hahn A B,Germany
123,Cube of Fire,unknown,fireball,4 minutes,,,1999/04/04,evening,Stoke on Trent,UK/England


In [23]:
other_ufo_df.drop('city_latitude', axis=1, inplace=True)
other_ufo_df.drop('city_longitude', axis=1, inplace=True)

### Combining with other city and country data

In [25]:
# Importing csv files
country_path = "static/Resources/country_abb.CSV"
country_data = pd.read_csv(country_path, error_bad_lines=False)

b'Skipping line 67: expected 16 fields, saw 17\nSkipping line 126: expected 16 fields, saw 17\nSkipping line 127: expected 16 fields, saw 17\n'


In [26]:
# Dropping unwanted columns
cleaned_country_df = pd.DataFrame(country_data, columns = {"Country or Area", "M49 Code", "ISO-alpha2 Code"})
cleaned_country_df

Unnamed: 0,ISO-alpha2 Code,Country or Area,M49 Code
0,DZ,Algeria,12
1,EG,Egypt,818
2,LY,Libya,434
3,MA,Morocco,504
4,SD,Sudan,729
...,...,...,...
241,WS,Samoa,882
242,TK,Tokelau,772
243,TO,Tonga,776
244,TV,Tuvalu,798


In [27]:
cleaned_country_df = cleaned_country_df.rename(columns = {"Country or Area": "country_name",
                                                          "M49 Code":"country_id",
                                                          "ISO-alpha2 Code": "country_abb"})
cleaned_country_df

Unnamed: 0,country_abb,country_name,country_id
0,DZ,Algeria,12
1,EG,Egypt,818
2,LY,Libya,434
3,MA,Morocco,504
4,SD,Sudan,729
...,...,...,...
241,WS,Samoa,882
242,TK,Tokelau,772
243,TO,Tonga,776
244,TV,Tuvalu,798


In [29]:
# Importing csv files
cities_path = "static/Resources/world_cities.CSV"
cities_df = pd.read_csv(cities_path)

In [30]:
cleaned_cities_df = cities_df.rename(columns = {"Country": "country_abb",
                                                          "City":"city_name",
                                                          "Latitude": "city_latitude",
                                                          "Longitude": "city_longitude"})


cleaned_cities_df['country_abb'] = cleaned_cities_df['country_abb'].str.upper()
cleaned_cities_df['city_name'] = cleaned_cities_df['city_name'].str.title()

cleaned_cities_df

Unnamed: 0,country_abb,city_name,city_latitude,city_longitude
0,AD,Andorra La Vella,42.500000,1.516667
1,AD,Canillo,42.566667,1.600000
2,AD,Encamp,42.533333,1.583333
3,AD,La Massana,42.550000,1.516667
4,AD,Les Escaldes,42.500000,1.533333
...,...,...,...,...
46827,ZW,Redcliffe,-19.033333,29.783333
46828,ZW,Rusape,-18.533333,32.116667
46829,ZW,Shurugwi,-19.666667,30.000000
46830,ZW,Victoria Falls,-17.933333,25.833333


In [31]:

other_ufo_df["country_name"].replace({"Usa" : "United States of America", 
                                                         "South Korea" : "Republic of Korea", 
                                                         "Macedonia" :"North Macedonia", 
                                                         "Russia" : "Russian Federation",
                                                        "Venezuela": "Venezuela (Bolivarian Republic of)",
                                                        "Iran": "Iran (Islamic Republic of)",
                                                        "Taiwan": "China",
                                                        "Usa Territory": "United States of America",
                                                        "Myanmar (Burma)": "Myanmar",
                                                        "Kermadec Islands (New Zealand)": "New Zealand",
                                                        "Bosnia-Herzegovina": "Bosnia and Herzegovina",
                                                        "South Georgia And The South Sandwich Islands": "South Georgia and the South Sandwich Islands",
                                                        "Tanzania": "United Republic of Tanzania",
                                                        "Uk" : "United Kingdom of Great Britain and Northern Ireland",
                                                        "Uk Territory" : "United Kingdom of Great Britain and Northern Ireland",
                                                        "UK/England" : "United Kingdom of Great Britain and Northern Ireland",
                                                        "UK/Scotland" : "United Kingdom of Great Britain and Northern Ireland",
                                                        "UK/Wales" : "United Kingdom of Great Britain and Northern Ireland",
                                                        "West Germany" : "Germany",
                                                        "South Australia" : "Australia",
                                                        "Republic of Ireland" : "Ireland",
                                                        "Brunei" : "Brunei Darussalam",
                                                        "Burma" : "Myanmar"}, inplace=True)


other_ufo_df.head()

Unnamed: 0,summary,state,shape,duration,date,time,city_name,country_name
12,A series of soft white lights were traveling i...,unknown,light,10-15 minutes,2020/01/10,morning,Chennai,India
84,Big UFO hovering over a house,unknown,other,4:00,1980/06/01,afternoon,Comalcalco,Mexico
88,30+ orange balls playing in Australia,unknown,fireball,5 minutes,1982/03/16,evening,Hobart,Australia
96,Simple triangle 40/50 yards across. Blue soft ...,unknown,triangle,7 minutes,1988/06/01,night,Hahn A B,Germany
123,Cube of Fire,unknown,fireball,4 minutes,1999/04/04,evening,Stoke on Trent,United Kingdom of Great Britain and Northern I...


In [32]:
# Merge main_other_df with country_df for the ID and abb
city_data = cleaned_cities_df.merge(cleaned_country_df, on="country_abb", how="inner")
city_data


Unnamed: 0,country_abb,city_name,city_latitude,city_longitude,country_name,country_id
0,AD,Andorra La Vella,42.500000,1.516667,Andorra,20
1,AD,Canillo,42.566667,1.600000,Andorra,20
2,AD,Encamp,42.533333,1.583333,Andorra,20
3,AD,La Massana,42.550000,1.516667,Andorra,20
4,AD,Les Escaldes,42.500000,1.533333,Andorra,20
...,...,...,...,...,...,...
46695,ZW,Redcliffe,-19.033333,29.783333,Zimbabwe,716
46696,ZW,Rusape,-18.533333,32.116667,Zimbabwe,716
46697,ZW,Shurugwi,-19.666667,30.000000,Zimbabwe,716
46698,ZW,Victoria Falls,-17.933333,25.833333,Zimbabwe,716


In [33]:
other_ufo_df['city_name'] = other_ufo_df['city_name'].str.rstrip()
other_ufo_df.head()

Unnamed: 0,summary,state,shape,duration,date,time,city_name,country_name
12,A series of soft white lights were traveling i...,unknown,light,10-15 minutes,2020/01/10,morning,Chennai,India
84,Big UFO hovering over a house,unknown,other,4:00,1980/06/01,afternoon,Comalcalco,Mexico
88,30+ orange balls playing in Australia,unknown,fireball,5 minutes,1982/03/16,evening,Hobart,Australia
96,Simple triangle 40/50 yards across. Blue soft ...,unknown,triangle,7 minutes,1988/06/01,night,Hahn A B,Germany
123,Cube of Fire,unknown,fireball,4 minutes,1999/04/04,evening,Stoke on Trent,United Kingdom of Great Britain and Northern I...


In [34]:
# Merge again for city data
merged_df = other_ufo_df.merge(city_data, on=['country_name',"city_name"], how="inner")
merged_df = merged_df.set_index('country_id')

### Preparing to export to CSV

### NOTE: Due to time constraint, there will be missing US cities in the city table

In [35]:
usa_ufo_df.head()

Unnamed: 0_level_0,summary,city_name,state,shape,duration,city_latitude,city_longitude,date,time
country_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
840.0,freaked me out,San Jose,CA,sphere,2 minutes,37.338842,-121.889706,2021/05/06,evening
840.0,"Over a course of 5 years, I have seen UFOs, ob...",Germantown,MD,formation,Years,39.154986,-77.272538,2005/07/06,afternoon
840.0,"Driving north on US 87 just outside Brady, Tex...",Brady,TX,light,Lights disappear,31.1451,-99.3478,2020/01/29,evening
840.0,meteor or space junk?,San Diego,CA,fireball,2 seconds,32.787229,-117.140268,2020/01/29,evening
840.0,"Shaped like a triangle, transparent like a lig...",Las Vegas,NV,triangle,30,36.141246,-115.186592,2020/01/28,afternoon


In [36]:
merged_df.head()

Unnamed: 0_level_0,summary,state,shape,duration,date,time,city_name,country_name,country_abb,city_latitude,city_longitude
country_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
484,Big UFO hovering over a house,unknown,other,4:00,1980/06/01,afternoon,Comalcalco,Mexico,MX,18.266667,-93.216667
36,30+ orange balls playing in Australia,unknown,fireball,5 minutes,1982/03/16,evening,Hobart,Australia,AU,-42.883209,147.331665
36,Strange orange light in Australia.,unknown,light,1 minute,2013/05/06,night,Hobart,Australia,AU,-42.883209,147.331665
826,rectangle shinny glass like blue box in the sk...,unknown,rectangle,1 hour,2009/06/30,evening,Oxford,United Kingdom of Great Britain and Northern I...,GB,51.75,-1.25
826,A Sphere in the sky that disappeared in a matt...,unknown,sphere,20 seconds,1986/06/15,afternoon,Oxford,United Kingdom of Great Britain and Northern I...,GB,51.75,-1.25


In [37]:
sightings_df = pd.concat([merged_df, usa_ufo_df])
sightings_df.head()

Unnamed: 0_level_0,summary,state,shape,duration,date,time,city_name,country_name,country_abb,city_latitude,city_longitude
country_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
484.0,Big UFO hovering over a house,unknown,other,4:00,1980/06/01,afternoon,Comalcalco,Mexico,MX,18.266667,-93.216667
36.0,30+ orange balls playing in Australia,unknown,fireball,5 minutes,1982/03/16,evening,Hobart,Australia,AU,-42.883209,147.331665
36.0,Strange orange light in Australia.,unknown,light,1 minute,2013/05/06,night,Hobart,Australia,AU,-42.883209,147.331665
826.0,rectangle shinny glass like blue box in the sk...,unknown,rectangle,1 hour,2009/06/30,evening,Oxford,United Kingdom of Great Britain and Northern I...,GB,51.75,-1.25
826.0,A Sphere in the sky that disappeared in a matt...,unknown,sphere,20 seconds,1986/06/15,afternoon,Oxford,United Kingdom of Great Britain and Northern I...,GB,51.75,-1.25


In [38]:
sightings_df.drop('country_name', axis=1, inplace=True)
sightings_df.drop('country_abb', axis=1, inplace=True)
sightings_df = sightings_df.rename(columns = {'city_name' : 'city'})
#sightings_df.sort_index()
sightings_df.head()

Unnamed: 0_level_0,summary,state,shape,duration,date,time,city,city_latitude,city_longitude
country_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
484.0,Big UFO hovering over a house,unknown,other,4:00,1980/06/01,afternoon,Comalcalco,18.266667,-93.216667
36.0,30+ orange balls playing in Australia,unknown,fireball,5 minutes,1982/03/16,evening,Hobart,-42.883209,147.331665
36.0,Strange orange light in Australia.,unknown,light,1 minute,2013/05/06,night,Hobart,-42.883209,147.331665
826.0,rectangle shinny glass like blue box in the sk...,unknown,rectangle,1 hour,2009/06/30,evening,Oxford,51.75,-1.25
826.0,A Sphere in the sky that disappeared in a matt...,unknown,sphere,20 seconds,1986/06/15,afternoon,Oxford,51.75,-1.25


In [39]:
cleaned_country_df = cleaned_country_df.set_index("country_id")
cleaned_country_df = cleaned_country_df.rename(columns = {'country_abb' : 'country_abbreviation'})

In [40]:
# x = cleaned_country_df[cleaned_country_df['country_id'] == " Sint Eustatius and Saba"]
# x

In [38]:
cleaned_country_df.head()

Unnamed: 0_level_0,country_name,country_abbreviation
country_id,Unnamed: 1_level_1,Unnamed: 2_level_1
12,Algeria,DZ
818,Egypt,EG
434,Libya,LY
504,Morocco,MA
729,Sudan,SD


In [41]:
# Save as CSV
cleaned_country_df.to_csv("static/Resources/cleaned_country_data.CSV", index = True)
sightings_df.to_csv("static/Resources/sightings_data.CSV", index = True)