In [7]:
import pandas as pd
import numpy as np

ufos = pd.read_csv('./data/ufos.csv')
ufos.head()


Unnamed: 0,datetime,city,state,country,shape,duration (seconds),duration (hours/min),comments,date posted,latitude,longitude
0,10/10/1949 20:30,san marcos,tx,us,cylinder,2700.0,45 minutes,This event took place in early fall around 194...,4/27/2004,29.883056,-97.941111
1,10/10/1949 21:00,lackland afb,tx,,light,7200.0,1-2 hrs,1949 Lackland AFB&#44 TX. Lights racing acros...,12/16/2005,29.38421,-98.581082
2,10/10/1955 17:00,chester (uk/england),,gb,circle,20.0,20 seconds,Green/Orange circular disc over Chester&#44 En...,1/21/2008,53.2,-2.916667
3,10/10/1956 21:00,edna,tx,us,circle,20.0,1/2 hour,My older brother and twin sister were leaving ...,1/17/2004,28.978333,-96.645833
4,10/10/1960 20:00,kaneohe,hi,us,light,900.0,15 minutes,AS a Marine 1st Lt. flying an FJ4B fighter/att...,1/22/2004,21.418056,-157.803611


In [8]:
ufos = pd.DataFrame({'Seconds': ufos['duration (seconds)'], 'Country': ufos['country'],'Latitude': ufos['latitude'],'Longitude': ufos['longitude']})

ufos.Country.unique()

array(['us', nan, 'gb', 'ca', 'au', 'de'], dtype=object)

In [9]:
ufos.shape

(80332, 4)

In [10]:
ufos.dropna(inplace=True)
ufos = ufos[(ufos['Seconds'] >= 1) & (ufos['Seconds'] <= 60)]
ufos.info()

<class 'pandas.core.frame.DataFrame'>
Index: 25863 entries, 2 to 80330
Data columns (total 4 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Seconds    25863 non-null  float64
 1   Country    25863 non-null  object 
 2   Latitude   25863 non-null  float64
 3   Longitude  25863 non-null  float64
dtypes: float64(3), object(1)
memory usage: 1010.3+ KB


In [11]:
#label encoder for the cat data Courntry
from sklearn.preprocessing import LabelEncoder
encode = LabelEncoder()

ufos['Country'] = encode.fit_transform(ufos['Country'])
ufos.head(10)

Unnamed: 0,Seconds,Country,Latitude,Longitude
2,20.0,3,53.2,-2.916667
3,20.0,4,28.978333,-96.645833
14,30.0,4,35.823889,-80.253611
23,60.0,4,45.582778,-122.352222
24,3.0,3,51.783333,-0.783333
25,30.0,4,29.423889,-98.493333
26,30.0,4,38.254167,-85.759444
36,60.0,4,29.763056,-95.363056
38,20.0,4,41.033889,-73.763333
43,60.0,4,40.015,-105.27


In [12]:
ufos.tail()

Unnamed: 0,Seconds,Country,Latitude,Longitude
80320,60.0,4,33.209722,-87.569167
80321,3.0,4,36.529722,-87.359444
80323,60.0,4,29.651389,-82.325
80326,20.0,4,34.101389,-84.519444
80330,5.0,4,38.901111,-77.265556


### Model - To Predict the Country

In [15]:
from sklearn.model_selection import train_test_split

X = ufos[['Seconds', 'Latitude', 'Longitude']]
y = ufos['Country']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=95)

In [17]:
print("Train-Test Size: ", (X_train.shape[0], X_test.shape[0]))

Train-Test Size:  (19397, 6466)


### Logistic

In [20]:
from sklearn.linear_model import LogisticRegression
model = LogisticRegression(solver= 'liblinear', max_iter=500)
model.fit(X_train, y_train)
pred = model.predict(X_test)


In [23]:
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

acc = round(accuracy_score(y_test, pred), 3)
print(f"Accuracy: {acc * 100}%")
print("Classification report: ", classification_report(y_test, pred, zero_division=0))

Accuracy: 95.0%
Classification report:                precision    recall  f1-score   support

           0       1.00      1.00      1.00        58
           1       1.00      0.01      0.03       315
           2       0.00      0.00      0.00        11
           3       0.95      1.00      0.97       192
           4       0.95      1.00      0.97      5890

    accuracy                           0.95      6466
   macro avg       0.78      0.60      0.59      6466
weighted avg       0.95      0.95      0.93      6466



In [24]:
pred

array([4, 4, 4, ..., 4, 4, 4], shape=(6466,))

In [25]:
encode.inverse_transform(pred)

array(['us', 'us', 'us', ..., 'us', 'us', 'us'],
      shape=(6466,), dtype=object)

In [26]:
import pickle
model_filename = 'ufo-logistic.pkl'
pickle.dump(model, open(model_filename, 'wb'))


In [30]:
model2 = pickle.load(open('ufo-logistic.pkl', 'rb'))
model.predict([[50, 44, -12]])



array([3])