# Building a Web App using a ML Model

In [1]:
# Preprocessing 
import pandas as pd
import numpy as np
from numpy import float64
from sklearn.preprocessing import LabelEncoder


# Transforming Data
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler

#Running the model
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from sklearn.linear_model import LogisticRegression



## Cleaning Data

In [2]:
ufos = pd.read_csv('./Data/scrubbed.csv')
ufos.head()

  ufos = pd.read_csv('./Data/scrubbed.csv')


Unnamed: 0,datetime,city,state,country,shape,duration (seconds),duration (hours/min),comments,date posted,latitude,longitude
0,10/10/1949 20:30,san marcos,tx,us,cylinder,2700,45 minutes,This event took place in early fall around 194...,4/27/2004,29.8830556,-97.941111
1,10/10/1949 21:00,lackland afb,tx,,light,7200,1-2 hrs,1949 Lackland AFB&#44 TX. Lights racing acros...,12/16/2005,29.38421,-98.581082
2,10/10/1955 17:00,chester (uk/england),,gb,circle,20,20 seconds,Green/Orange circular disc over Chester&#44 En...,1/21/2008,53.2,-2.916667
3,10/10/1956 21:00,edna,tx,us,circle,20,1/2 hour,My older brother and twin sister were leaving ...,1/17/2004,28.9783333,-96.645833
4,10/10/1960 20:00,kaneohe,hi,us,light,900,15 minutes,AS a Marine 1st Lt. flying an FJ4B fighter/att...,1/22/2004,21.4180556,-157.803611


In [3]:
#Selecting the columns I need
ufos = pd.DataFrame({'Seconds': ufos['duration (seconds)'], 'Country': ufos['country'],'Latitude': ufos['latitude'],'Longitude': ufos['longitude ']})


In [4]:
ufos.isnull().sum()

Seconds         0
Country      9670
Latitude        0
Longitude       0
dtype: int64

In [5]:
#Comverting Latitude to float
ufos['Latitude']= ufos['Latitude'].drop([43782], axis=0)
ufos['Latitude'] = pd.to_numeric(ufos['Latitude'], errors = 'raise')


In [6]:
#Comverting Seconds to float
ufos['Seconds']= ufos['Seconds'].replace('2`','2')
ufos['Seconds']= ufos['Seconds'].replace('8`','8')
ufos['Seconds']= ufos['Seconds'].replace('0.5`','0.5')
ufos['Seconds'] = pd.to_numeric(ufos['Seconds'], errors = 'raise')

In [7]:
#Taking the range of seconds from 1 to 60
ufos.dropna(inplace=True)
ufos = ufos[(ufos['Seconds'] >= 1) & (ufos['Seconds'] <= 60)]

ufos.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 25863 entries, 2 to 80330
Data columns (total 4 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Seconds    25863 non-null  float64
 1   Country    25863 non-null  object 
 2   Latitude   25863 non-null  float64
 3   Longitude  25863 non-null  float64
dtypes: float64(3), object(1)
memory usage: 1010.3+ KB


In [8]:
#Encoding Country

ufos['Country'] = LabelEncoder().fit_transform(ufos['Country'])

ufos.head()

Unnamed: 0,Seconds,Country,Latitude,Longitude
2,20.0,3,53.2,-2.916667
3,20.0,4,28.978333,-96.645833
14,30.0,4,35.823889,-80.253611
23,60.0,4,45.582778,-122.352222
24,3.0,3,51.783333,-0.783333


## Building the model

In [9]:
# Separating train & test
X = ufos[['Seconds','Latitude','Longitude']]
y = ufos['Country']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [10]:
#Scaling Data and training the model

model = make_pipeline(StandardScaler(), LogisticRegression())
model.fit(X_train, y_train)  # apply scaling on training data
pred= model.predict(X_test)


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [11]:
print(classification_report(y_test, pred))
print('Predicted labels: ', pred)
print('Accuracy: ', accuracy_score(y_test, pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        41
           1       0.83      0.40      0.54       288
           2       1.00      0.10      0.18        10
           3       0.93      1.00      0.96       134
           4       0.96      0.99      0.98      4700

    accuracy                           0.96      5173
   macro avg       0.94      0.70      0.73      5173
weighted avg       0.96      0.96      0.95      5173

Predicted labels:  [4 4 4 ... 4 4 1]
Accuracy:  0.960371157935434


'pickle' the model

In [12]:
import pickle

In [13]:
model_filename = 'ufo-model.pkl'
pickle.dump(model, open(model_filename,'wb'))

In [14]:
model = pickle.load(open('ufo-model.pkl','rb'))
print(model.predict([[50,44,-12]]))

[3]


### Building a Flask app
 Building a Flask app to call the model and return similar results, but in a more visually pleasing way.



In [15]:
cd web-app

c:\Users\diana\Desktop\Data science\Proyecto\Web App\web-app


In [16]:
pip install -r requirements.txt


Note: you may need to restart the kernel to use updated packages.
