In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

import statsmodels.formula.api as sm
import scipy.stats as stats
import pandas_profiling   #need to install using anaconda prompt (pip install pandas_profiling)

%matplotlib inline
plt.rcParams['figure.figsize'] = 10, 7.5
plt.rcParams['axes.grid'] = True
plt.gray()

from matplotlib.backends.backend_pdf import PdfPages

from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.linear_model import LogisticRegression
from statsmodels.stats.outliers_influence import variance_inflation_factor
from patsy import dmatrices
import pickle

<Figure size 720x540 with 0 Axes>

In [2]:
data=pd.read_csv('incchn1920.csv',encoding="latin1")
data.head()

Unnamed: 0,number,priority,caused_by,type,category,cmdb_ci,u_for_cab_review,u_ci_change_unavailable_i,u_impacted_application,u_change_timing,...,u_end_user_affected_i,u_critical_event,u_environment,SFA_date,assignment_group,short_description,urgency,u_implementation_status,start_date,end_date
0,INC11787642,P1S3,CHG0755981,Comprehensive,Configure,Virtual Machine test,No,No,RedHat Enterprise Linux,Emergency,...,No effect to End User,False,Development,00-01-1900 00:00,CNS Cloud Connect Level 4,P1S3 Emergency <DNS resolution issue on Infras...,2 - High,Successful,02-07-2019 18:00,03-07-2019 10:00
1,INC11788137,P2S2,CHG0755981,Comprehensive,Configure,Virtual Machine test,No,No,RedHat Enterprise Linux,Emergency,...,No effect to End User,False,Development,00-01-1900 00:00,CNS Cloud Connect Level 4,P1S3 Emergency <DNS resolution issue on Infras...,2 - High,Successful,02-07-2019 18:00,03-07-2019 10:00
2,INC11788797,P2S2,CHG0735342,,,,,,,,...,,,,,,,,,,
3,INC11790329,P1S3,CHG0716171,,,,,,,,...,,,,,,,,,,
4,INC11790388,P1S3,CHG0716171,,,,,,,,...,,,,,,,,,,


In [3]:
data.columns

Index(['number', 'priority', 'caused_by', 'type', 'category', 'cmdb_ci',
       'u_for_cab_review', 'u_ci_change_unavailable_i',
       'u_impacted_application', 'u_change_timing', 'subcategory',
       'u_kind_change_impl_i', 'u_where_impl_change_i',
       'u_affect_share_or_core_i', 'scope', 'u_countries', 'u_offices',
       'u_area', 'u_sub_area', 'u_portfolio', 'u_impact_not_performed_i',
       'u_end_user_affected_i', 'u_critical_event', 'u_environment',
       'SFA_date', 'assignment_group', 'short_description', 'urgency',
       'u_implementation_status', 'start_date', 'end_date'],
      dtype='object')

In [4]:
data['label'] = data['priority'].map({'P1S1': 1, 'P1S2': 2,'P1S3' :3, 'P2S1' :4, 'P2S2' :5})
X=data.loc[:,['cmdb_ci','u_impacted_application','u_portfolio','u_environment']]
y=data['label']

In [5]:
X

Unnamed: 0,cmdb_ci,u_impacted_application,u_portfolio,u_environment
0,Virtual Machine test,RedHat Enterprise Linux,IT Infrastructure,Development
1,Virtual Machine test,RedHat Enterprise Linux,IT Infrastructure,Development
2,,,,
3,,,,
4,,,,
...,...,...,...,...
2617,Load Balancer,Active Directory,IT Infrastructure,Production
2618,R2-NLLWR-NO02-00-RTR-01.EY.NET,,IT Infrastructure,Production
2619,SERVER NOT LISTED,,GHS,Production
2620,SERVER NOT LISTED,,GHS,Production


In [6]:
y

0       3
1       5
2       5
3       3
4       3
       ..
2617    2
2618    3
2619    3
2620    3
2621    3
Name: label, Length: 2622, dtype: int64

In [7]:
y.value_counts()

3    1584
4     778
5     207
2      37
1      16
Name: label, dtype: int64

In [8]:
y.shape

(2622,)

In [9]:
X.shape

(2622, 4)

In [10]:
from sklearn.preprocessing import OrdinalEncoder, StandardScaler,LabelEncoder

In [11]:
enc= LabelEncoder()

In [12]:
X = X.apply(lambda col: enc.fit_transform(col.astype(str)), axis=0, result_type='expand')

In [13]:
X

Unnamed: 0,cmdb_ci,u_impacted_application,u_portfolio,u_environment
0,271,142,20,0
1,271,142,20,0
2,284,188,33,2
3,284,188,33,2
4,284,188,33,2
...,...,...,...,...
2617,124,4,20,1
2618,198,188,20,1
2619,228,188,15,1
2620,228,188,15,1


In [14]:
from flask import Flask,render_template,url_for,request
from gevent.pywsgi import WSGIServer
import pandas as pd 
import pickle

In [15]:
pickle.dump(enc, open('tranform.pkl', 'wb'))

In [16]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [17]:
from sklearn.ensemble import RandomForestClassifier

In [18]:
clf =  RandomForestClassifier(max_depth=27)
clf.fit(X_train,y_train)
clf.score(X_test,y_test)

0.6212471131639723

In [19]:
filename = 'nlp_model.pkl'
pickle.dump(clf, open(filename, 'wb'))

In [20]:
filename = 'nlp_model.pkl'
clf = pickle.load(open(filename, 'rb'))
enc=pickle.load(open('tranform.pkl','rb'))

In [21]:
app = Flask(__name__)

In [22]:
import pickle
pickle_out = open(filename,"wb")
pickle.dump(clf, pickle_out)
pickle_out.close()

In [23]:
clf.predict([[271,142,15,2]])

array([3], dtype=int64)

In [24]:
@app.route('/')
def home():
    return render_template('home.html')

In [25]:
@app.route('/predict',methods=['POST'])
def predict():
    if request.method == 'POST':
        impactedapplication = request.form['u_impacted_application']
        cmdbci=request.form['cmdb_ci']
        portfolio=request.form['u_portfolio']
        environment=request.form['u_environment']
        #vect = enc.transform([[impactedapplication,cmdbci,portfolio,environment]]).toarray()
        my_prediction = clf.predict([[impactedapplication,cmdbci,portfolio,environment]])
    return render_template('result.html',prediction = my_prediction)

In [None]:
if __name__ == '__main__':
    app.run(debug=True, use_reloader=False)
   

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
   Use a production WSGI server instead.
 * Debug mode: on


 * Running on http://127.0.0.1:5000/ (Press CTRL+C to quit)
