# Build a Web App to use a ML Model

In [2]:
import pandas as pd
import numpy as np

ufos = pd.read_csv('./ufos.csv')
ufos.head()

Unnamed: 0,datetime,city,state,country,shape,duration (seconds),duration (hours/min),comments,date posted,latitude,longitude
0,10/10/1949 20:30,san marcos,tx,us,cylinder,2700.0,45 minutes,This event took place in early fall around 194...,4/27/2004,29.883056,-97.941111
1,10/10/1949 21:00,lackland afb,tx,,light,7200.0,1-2 hrs,1949 Lackland AFB&#44 TX. Lights racing acros...,12/16/2005,29.38421,-98.581082
2,10/10/1955 17:00,chester (uk/england),,gb,circle,20.0,20 seconds,Green/Orange circular disc over Chester&#44 En...,1/21/2008,53.2,-2.916667
3,10/10/1956 21:00,edna,tx,us,circle,20.0,1/2 hour,My older brother and twin sister were leaving ...,1/17/2004,28.978333,-96.645833
4,10/10/1960 20:00,kaneohe,hi,us,light,900.0,15 minutes,AS a Marine 1st Lt. flying an FJ4B fighter/att...,1/22/2004,21.418056,-157.803611


Convert the ufos data to a small dataframe with fresh titles. Check the unique values in Country field

In [3]:
ufos = pd.DataFrame({ 'Seconds': ufos['duration (seconds)'], 'Country': ufos['country'], 'Latitude': ufos['latitude'], 'Longitude': ufos['longitude'] })

ufos.Country.unique()

array(['us', nan, 'gb', 'ca', 'au', 'de'], dtype=object)

In [None]:
ufos.isna().sum()

Seconds         0
Country      9670
Latitude        0
Longitude       0
dtype: int64

In [None]:
ufos.dropna(inplace=True)

ufos = ufos[(ufos['Seconds'] >= 1) & (ufos['Seconds'] <= 60)]

ufos.info()

Convert the text values for countries to a number.
LabelEnconder encodes data alphabetically 

In [6]:
from sklearn.preprocessing import LabelEncoder

ufos['Country'] = LabelEncoder().fit_transform(ufos['Country'])

ufos.head()

Unnamed: 0,Seconds,Country,Latitude,Longitude
0,2700.0,4,29.883056,-97.941111
1,7200.0,5,29.38421,-98.581082
2,20.0,3,53.2,-2.916667
3,20.0,4,28.978333,-96.645833
4,900.0,4,21.418056,-157.803611


Select the three features to train as the X vector and the y vector will be the Country. Want to be able to input Seconds, Latitude and Longitude and get a country ID to return

In [7]:
from sklearn.model_selection import train_test_split

Selected_features = ['Seconds', 'Latitude', 'Longitude']

X = ufos[Selected_features]
y = ufos['Country']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

Train your model using logistic regression

In [11]:
from sklearn.metrics import accuracy_score, classification_report
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

model = LogisticRegression(max_iter=1000)
model.fit(X_train_scaled, y_train)
predictions = model.predict(X_test_scaled)

print(classification_report(y_test, predictions))
print('Predicted labels: ', predictions)
print('Accuracy: ', accuracy_score(y_test, predictions))

              precision    recall  f1-score   support

           0       0.73      0.92      0.82       105
           1       0.11      0.01      0.02       649
           2       0.00      0.00      0.00        24
           3       0.69      1.00      0.81       428
           4       0.87      0.99      0.93     12945
           5       0.97      0.17      0.30      1916

    accuracy                           0.86     16067
   macro avg       0.56      0.52      0.48     16067
weighted avg       0.84      0.86      0.81     16067

Predicted labels:  [4 3 4 ... 4 4 4]
Accuracy:  0.8557291342503267


  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


Pickle the model! You can do that in a few lines of code. Once it's pickled load your pickled model and test it against a sample data array containing values for seconds, latitude and longitude.

In [12]:
import pickle

model_filename = 'ufo-model.pkl'
pickle.dump(model, open(model_filename, 'wb'))

model = pickle.load(open('ufo-model.pkl', 'rb'))

print(model.predict([[50, 44, 12]]))

[3]
