## Importing the required packages

In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, precision_recall_fscore_support

## Loading the dataset

In [2]:

import os, types
import pandas as pd
from botocore.client import Config
import ibm_boto3

def __iter__(self): return 0

# @hidden_cell
# The following code accesses a file in your IBM Cloud Object Storage. It includes your credentials.
# You might want to remove those credentials before you share the notebook.
cos_client = ibm_boto3.client(service_name='s3',
    ibm_api_key_id='clIomr-o3mi4uya6zQO6eZ5Vn2OI0_5pifA9ZHIK6KJZ',
    ibm_auth_endpoint="https://iam.cloud.ibm.com/oidc/token",
    config=Config(signature_version='oauth'),
    endpoint_url='https://s3.private.us.cloud-object-storage.appdomain.cloud')

bucket = 'flightdelay-donotdelete-pr-zwjtizzcthbqf1'
object_key = 'flightdata.csv'

body = cos_client.get_object(Bucket=bucket,Key=object_key)['Body']
# add missing __iter__ method, so pandas accepts body as file-like object
if not hasattr(body, "__iter__"): body.__iter__ = types.MethodType( __iter__, body )

df = pd.read_csv(body)
df.head()


Unnamed: 0,YEAR,QUARTER,MONTH,DAY_OF_MONTH,DAY_OF_WEEK,UNIQUE_CARRIER,TAIL_NUM,FL_NUM,ORIGIN_AIRPORT_ID,ORIGIN,...,CRS_ARR_TIME,ARR_TIME,ARR_DELAY,ARR_DEL15,CANCELLED,DIVERTED,CRS_ELAPSED_TIME,ACTUAL_ELAPSED_TIME,DISTANCE,Unnamed: 25
0,2016,1,1,1,5,DL,N836DN,1399,10397,ATL,...,2143,2102.0,-41.0,0.0,0.0,0.0,338.0,295.0,2182.0,
1,2016,1,1,1,5,DL,N964DN,1476,11433,DTW,...,1435,1439.0,4.0,0.0,0.0,0.0,110.0,115.0,528.0,
2,2016,1,1,1,5,DL,N813DN,1597,10397,ATL,...,1215,1142.0,-33.0,0.0,0.0,0.0,335.0,300.0,2182.0,
3,2016,1,1,1,5,DL,N587NW,1768,14747,SEA,...,1335,1345.0,10.0,0.0,0.0,0.0,196.0,205.0,1399.0,
4,2016,1,1,1,5,DL,N836DN,1823,14747,SEA,...,607,615.0,8.0,0.0,0.0,0.0,247.0,259.0,1927.0,


In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11231 entries, 0 to 11230
Data columns (total 26 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   YEAR                 11231 non-null  int64  
 1   QUARTER              11231 non-null  int64  
 2   MONTH                11231 non-null  int64  
 3   DAY_OF_MONTH         11231 non-null  int64  
 4   DAY_OF_WEEK          11231 non-null  int64  
 5   UNIQUE_CARRIER       11231 non-null  object 
 6   TAIL_NUM             11231 non-null  object 
 7   FL_NUM               11231 non-null  int64  
 8   ORIGIN_AIRPORT_ID    11231 non-null  int64  
 9   ORIGIN               11231 non-null  object 
 10  DEST_AIRPORT_ID      11231 non-null  int64  
 11  DEST                 11231 non-null  object 
 12  CRS_DEP_TIME         11231 non-null  int64  
 13  DEP_TIME             11124 non-null  float64
 14  DEP_DELAY            11124 non-null  float64
 15  DEP_DEL15            11124 non-null 

## Dropping unnecessary columns

In [4]:
df = df[['FL_NUM', 'MONTH', 'DAY_OF_MONTH', 'DAY_OF_WEEK', 'ORIGIN', 'DEST', 'DEP_DEL15', 'CRS_ARR_TIME', 'ARR_DEL15']]
df.head()

Unnamed: 0,FL_NUM,MONTH,DAY_OF_MONTH,DAY_OF_WEEK,ORIGIN,DEST,DEP_DEL15,CRS_ARR_TIME,ARR_DEL15
0,1399,1,1,5,ATL,SEA,0.0,2143,0.0
1,1476,1,1,5,DTW,MSP,0.0,1435,0.0
2,1597,1,1,5,ATL,SEA,0.0,1215,0.0
3,1768,1,1,5,SEA,MSP,0.0,1335,0.0
4,1823,1,1,5,SEA,DTW,0.0,607,0.0


## Handling Missing Values

### Checking for null values

In [5]:
df.isnull().any()

FL_NUM          False
MONTH           False
DAY_OF_MONTH    False
DAY_OF_WEEK     False
ORIGIN          False
DEST            False
DEP_DEL15        True
CRS_ARR_TIME    False
ARR_DEL15        True
dtype: bool

### Replacing null values

In [6]:
df.fillna(df['DEP_DEL15'].mode()[0], inplace=True)
df.fillna(df['ARR_DEL15'].mode()[0], inplace=True)

### Checking if the replacement is made

In [7]:
df.isnull().any()

FL_NUM          False
MONTH           False
DAY_OF_MONTH    False
DAY_OF_WEEK     False
ORIGIN          False
DEST            False
DEP_DEL15       False
CRS_ARR_TIME    False
ARR_DEL15       False
dtype: bool

## Encoding

### One Hot Encoding

In [8]:
df = pd.get_dummies(df, columns=['ORIGIN', 'DEST'])
df.head()

Unnamed: 0,FL_NUM,MONTH,DAY_OF_MONTH,DAY_OF_WEEK,DEP_DEL15,CRS_ARR_TIME,ARR_DEL15,ORIGIN_ATL,ORIGIN_DTW,ORIGIN_JFK,ORIGIN_MSP,ORIGIN_SEA,DEST_ATL,DEST_DTW,DEST_JFK,DEST_MSP,DEST_SEA
0,1399,1,1,5,0.0,2143,0.0,1,0,0,0,0,0,0,0,0,1
1,1476,1,1,5,0.0,1435,0.0,0,1,0,0,0,0,0,0,1,0
2,1597,1,1,5,0.0,1215,0.0,1,0,0,0,0,0,0,0,0,1
3,1768,1,1,5,0.0,1335,0.0,0,0,0,0,1,0,0,0,1,0
4,1823,1,1,5,0.0,607,0.0,0,0,0,0,1,0,1,0,0,0


In [9]:
df.columns

Index(['FL_NUM', 'MONTH', 'DAY_OF_MONTH', 'DAY_OF_WEEK', 'DEP_DEL15',
       'CRS_ARR_TIME', 'ARR_DEL15', 'ORIGIN_ATL', 'ORIGIN_DTW', 'ORIGIN_JFK',
       'ORIGIN_MSP', 'ORIGIN_SEA', 'DEST_ATL', 'DEST_DTW', 'DEST_JFK',
       'DEST_MSP', 'DEST_SEA'],
      dtype='object')

## Splitting dataset into Independent and Dependent Variables

In [10]:
X = df.drop(columns=['ARR_DEL15'])
Y = df[['ARR_DEL15']]

## Converting the Independent and Dependent Variables to 1D Arrays

In [11]:
X = X.values
Y = Y.values

## Splitting dataset into Train and Test datasets

In [12]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2)

In [13]:
X_train.shape, X_test.shape, Y_train.shape, Y_test.shape

((8984, 16), (2247, 16), (8984, 1), (2247, 1))

## Building the Logistic Regression Machine Learning Model

In [14]:
from sklearn.linear_model import LogisticRegression
model = LogisticRegression(max_iter=800)
model.fit(X_train, Y_train.ravel())

LogisticRegression(max_iter=800)

## Testing the Model

In [15]:
Y_pred_train = model.predict(X_train)
Y_pred_test = model.predict(X_test)

In [16]:
pd.DataFrame(Y_pred_train).value_counts()

0.0    7750
1.0    1234
dtype: int64

In [17]:
pd.DataFrame(Y_pred_test).value_counts()

0.0    1974
1.0     273
dtype: int64

## Evaluating the ML Model using Metrics

##### Classification Report

In [18]:
print(classification_report(Y_test, Y_pred_test))

              precision    recall  f1-score   support

         0.0       0.96      0.96      0.96      1987
         1.0       0.68      0.72      0.70       260

    accuracy                           0.93      2247
   macro avg       0.82      0.84      0.83      2247
weighted avg       0.93      0.93      0.93      2247



##### Accuracy, Precision, Recall, F1 Score

In [19]:
acc = accuracy_score(Y_test, Y_pred_test)
prec, rec, f1, sup = precision_recall_fscore_support(Y_test, Y_pred_test)
print('Accuracy Score =', acc)
print('Precision =', prec[0])
print('Recall =', rec[0])
print('F1 Score =', f1[0])

Accuracy Score = 0.9283489096573209
Precision = 0.9625126646403243
Recall = 0.9562154001006542
F1 Score = 0.9593536985609694


##### Checking for Overfitting and Underfitting

In [20]:
train_acc = accuracy_score(Y_train, Y_pred_train)
test_acc = accuracy_score(Y_test, Y_pred_test)
print('Training Accuracy =', train_acc)
print('Testing Accuracy =', test_acc)

Training Accuracy = 0.9190783615316117
Testing Accuracy = 0.9283489096573209


<p><b>There is no big variation in the training and testing accuracy. Therefore, the Logistic Regression model is not overfit or underfit.</b></p>

##### Confusion Matrix

In [21]:
pd.crosstab(Y_test.ravel(), Y_pred_test)

col_0,0.0,1.0
row_0,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,1900,87
1.0,74,186


## IBM Deployment

In [22]:
!pip install -U ibm-watson-machine-learning



In [23]:
from ibm_watson_machine_learning import APIClient
import json

### Authenticate and Set Space

In [24]:
wml_credentials = {
    "apikey":"I6vmW4nmyS35HD92jVtP81M_Ltw4dt5YoSFGBSpTvvSJ",
    "url":"https://us-south.ml.cloud.ibm.com"
}

In [25]:
wml_client = APIClient(wml_credentials)

In [26]:
wml_client.spaces.list()

Note: 'limit' is not provided. Only first 50 records will be displayed if the number of records exceed 50
------------------------------------  ------------  ------------------------
ID                                    NAME          CREATED
fe072024-e9ca-46c7-aa4f-461b5d334cfb  flight-delay  2022-11-17T15:04:27.398Z
84b0d9cd-0b5e-498f-97f0-80ce2781bd3c  B7-insurance  2022-10-19T13:45:09.759Z
4e12c9fc-81ad-491b-8d2e-211afa98c59a  iris-B7       2022-10-17T09:05:57.194Z
------------------------------------  ------------  ------------------------


In [27]:
SPACE_ID = "fe072024-e9ca-46c7-aa4f-461b5d334cfb"

In [28]:
wml_client.set.default_space(SPACE_ID)

'SUCCESS'

In [29]:
wml_client.software_specifications.list(500)

-------------------------------  ------------------------------------  ----
NAME                             ASSET_ID                              TYPE
default_py3.6                    0062b8c9-8b7d-44a0-a9b9-46c416adcbd9  base
kernel-spark3.2-scala2.12        020d69ce-7ac1-5e68-ac1a-31189867356a  base
pytorch-onnx_1.3-py3.7-edt       069ea134-3346-5748-b513-49120e15d288  base
scikit-learn_0.20-py3.6          09c5a1d0-9c1e-4473-a344-eb7b665ff687  base
spark-mllib_3.0-scala_2.12       09f4cff0-90a7-5899-b9ed-1ef348aebdee  base
pytorch-onnx_rt22.1-py3.9        0b848dd4-e681-5599-be41-b5f6fccc6471  base
ai-function_0.1-py3.6            0cdb0f1e-5376-4f4d-92dd-da3b69aa9bda  base
shiny-r3.6                       0e6e79df-875e-4f24-8ae9-62dcc2148306  base
tensorflow_2.4-py3.7-horovod     1092590a-307d-563d-9b62-4eb7d64b3f22  base
pytorch_1.1-py3.6                10ac12d6-6b30-4ccd-8392-3e922c096a92  base
tensorflow_1.15-py3.6-ddl        111e41b3-de2d-5422-a4d6-bf776828c4b7  base
autoai-kb_rt

### Save and Deploy the Model

In [30]:
import sklearn
sklearn.__version__

'1.0.2'

In [31]:
MODEL_NAME = 'flight-delay'
DEPLOYMENT_NAME = 'flight-delay'
DEMO_MODEL = model

##### Set Python Version

In [32]:
software_spec_uid = wml_client.software_specifications.get_id_by_name('runtime-22.1-py3.9')

##### Setup Model Meta

In [33]:
model_props = {
    wml_client.repository.ModelMetaNames.NAME: MODEL_NAME, 
    wml_client.repository.ModelMetaNames.TYPE: 'scikit-learn_1.0', 
    wml_client.repository.ModelMetaNames.SOFTWARE_SPEC_UID: software_spec_uid 
}

##### Save Model

In [34]:
model_details = wml_client.repository.store_model(
    model=DEMO_MODEL, 
    meta_props=model_props, 
    training_data=X_train, 
    training_target=Y_train
)

In [35]:
model_details

{'entity': {'hybrid_pipeline_software_specs': [],
  'label_column': 'l0',
  'schemas': {'input': [{'fields': [{'name': 'f0', 'type': 'float'},
      {'name': 'f1', 'type': 'float'},
      {'name': 'f2', 'type': 'float'},
      {'name': 'f3', 'type': 'float'},
      {'name': 'f4', 'type': 'float'},
      {'name': 'f5', 'type': 'float'},
      {'name': 'f6', 'type': 'float'},
      {'name': 'f7', 'type': 'float'},
      {'name': 'f8', 'type': 'float'},
      {'name': 'f9', 'type': 'float'},
      {'name': 'f10', 'type': 'float'},
      {'name': 'f11', 'type': 'float'},
      {'name': 'f12', 'type': 'float'},
      {'name': 'f13', 'type': 'float'},
      {'name': 'f14', 'type': 'float'},
      {'name': 'f15', 'type': 'float'}],
     'id': '1',
     'type': 'struct'}],
   'output': []},
  'software_spec': {'id': '12b83a17-24d8-5082-900f-0ab31fbfd3cb',
   'name': 'runtime-22.1-py3.9'},
  'type': 'scikit-learn_1.0'},
 'metadata': {'created_at': '2022-11-18T07:08:59.197Z',
  'id': '9e04976f-e

In [36]:
model_id = wml_client.repository.get_model_id(model_details)
model_id

'9e04976f-e706-45ca-b2f0-06104acde153'

##### Set Meta

In [37]:
deployment_props = {
    wml_client.deployments.ConfigurationMetaNames.NAME:DEPLOYMENT_NAME, 
    wml_client.deployments.ConfigurationMetaNames.ONLINE: {}
}

##### Deploy

In [38]:
deployment = wml_client.deployments.create(
    artifact_uid=model_id,
    meta_props=deployment_props
)



#######################################################################################

Synchronous deployment creation for uid: '9e04976f-e706-45ca-b2f0-06104acde153' started

#######################################################################################


initializing
Note: online_url is deprecated and will be removed in a future release. Use serving_urls instead.

ready


------------------------------------------------------------------------------------------------
Successfully finished deployment creation, deployment_uid='cec48201-70cc-4651-aa5d-7f49f99a586a'
------------------------------------------------------------------------------------------------


