In [24]:
import pandas as pd
import numpy as np

ufos = pd.read_csv('data/ufos.csv')
ufos.head()

Unnamed: 0,datetime,city,state,country,shape,duration (seconds),duration (hours/min),comments,date posted,latitude,longitude
0,10/10/1949 20:30,san marcos,tx,us,cylinder,2700.0,45 minutes,This event took place in early fall around 194...,4/27/2004,29.883056,-97.941111
1,10/10/1949 21:00,lackland afb,tx,,light,7200.0,1-2 hrs,1949 Lackland AFB&#44 TX. Lights racing acros...,12/16/2005,29.38421,-98.581082
2,10/10/1955 17:00,chester (uk/england),,gb,circle,20.0,20 seconds,Green/Orange circular disc over Chester&#44 En...,1/21/2008,53.2,-2.916667
3,10/10/1956 21:00,edna,tx,us,circle,20.0,1/2 hour,My older brother and twin sister were leaving ...,1/17/2004,28.978333,-96.645833
4,10/10/1960 20:00,kaneohe,hi,us,light,900.0,15 minutes,AS a Marine 1st Lt. flying an FJ4B fighter/att...,1/22/2004,21.418056,-157.803611


In [25]:
ufos = pd.DataFrame({'Seconds': ufos['duration (seconds)'], 'Country': ufos['country'],'Latitude': ufos['latitude'],'Longitude': ufos['longitude']})


In [26]:
ufos.isna().sum()

Seconds         0
Country      9670
Latitude        0
Longitude       0
dtype: int64

In [27]:
ufos.dropna(inplace=True)

In [28]:
ufos.Country.unique()

array(['us', 'gb', 'ca', 'au', 'de'], dtype=object)

In [29]:
from sklearn.preprocessing import LabelEncoder
ufos.Country = LabelEncoder().fit_transform(ufos.Country)
ufos.head()

Unnamed: 0,Seconds,Country,Latitude,Longitude
0,2700.0,4,29.883056,-97.941111
2,20.0,3,53.2,-2.916667
3,20.0,4,28.978333,-96.645833
4,900.0,4,21.418056,-157.803611
5,300.0,4,36.595,-82.188889


In [30]:
ufos.info

<bound method DataFrame.info of        Seconds  Country   Latitude   Longitude
0       2700.0        4  29.883056  -97.941111
2         20.0        3  53.200000   -2.916667
3         20.0        4  28.978333  -96.645833
4        900.0        4  21.418056 -157.803611
5        300.0        4  36.595000  -82.188889
...        ...      ...        ...         ...
80327    600.0        4  36.165833  -86.784444
80328   1200.0        4  43.613611 -116.202500
80329   1200.0        4  38.297222 -122.284444
80330      5.0        4  38.901111  -77.265556
80331   1020.0        4  35.652778  -97.477778

[70662 rows x 4 columns]>

In [34]:
ufos = ufos[(ufos['Seconds']>=1) & (ufos['Seconds']<=60)]
ufos.info

<bound method DataFrame.info of        Seconds  Country   Latitude   Longitude
2         20.0        3  53.200000   -2.916667
3         20.0        4  28.978333  -96.645833
14        30.0        4  35.823889  -80.253611
23        60.0        4  45.582778 -122.352222
24         3.0        3  51.783333   -0.783333
...        ...      ...        ...         ...
80320     60.0        4  33.209722  -87.569167
80321      3.0        4  36.529722  -87.359444
80323     60.0        4  29.651389  -82.325000
80326     20.0        4  34.101389  -84.519444
80330      5.0        4  38.901111  -77.265556

[25863 rows x 4 columns]>

Creating training and test dataset

In [35]:
from sklearn.model_selection import train_test_split
features = ['Seconds', 'Latitude', 'Longitude']
X = ufos[features]
y = ufos['Country']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

Creating Logistic regression model

In [39]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score

model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)
predictions = model.predict(X_test)
print(classification_report(y_test, predictions))
print('Predictions', predictions)
print("Accuracy score", accuracy_score(y_test, predictions))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        41
           1       0.85      0.46      0.60       250
           2       1.00      1.00      1.00         8
           3       1.00      1.00      1.00       131
           4       0.97      1.00      0.98      4743

    accuracy                           0.97      5173
   macro avg       0.96      0.89      0.92      5173
weighted avg       0.97      0.97      0.97      5173

Predictions [4 4 4 ... 3 4 4]
Accuracy score 0.970036729170694


Pickle our model

In [40]:
import pickle
filename = 'ufo-model.pkl'
pickle.dump(model, open(filename, 'wb'))

In [41]:
pklmodel = pickle.load(open('ufo-model.pkl', 'rb'))
pklmodel.predict([[50, 44, -12]])

array([3])