# Load Lib

In [1]:
import numpy as np
import pandas as pd
import os 
import re # regex library
# Read the Data
# Train, Test Split

from sklearn.model_selection import train_test_split
# Training a Neural Network Pipeline
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import accuracy_score, classification_report
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import cross_val_score
from joblib import dump

In [2]:
!python --version 

Python 3.8.8


# Config 

In [3]:
# -*- coding: utf-8 -*-
"""Model config in json format"""
cfg = {
    "data": {
        ##alldata
        "path": "C:/Users/rzouga/Desktop/ALLINHERE/ALLINHERE/Deploy_Spam_Detection/data/spam_data.csv"
        # small sample:
       # "path": "C:/Users/rzouga/Desktop/ALLINHERE/ALLINHERE/FraudDetection/X_train_transactions_train.csv"
    },
    # "data_test": {
    #   "path": "../input/ventilator-pressure-prediction/test.csv"
    # },
    # "data_submission": {
    #   "path": "../input/ventilator-pressure-prediction/test.csv"
    # },
    "train": {
        'fit_params': {'early_stopping_rounds': 100, 'verbose': 55000},
        'n_fold': 5,
        'seeds': [2021],
        'target_col': "Fraud",
        'debug': False

    },
    "model": {'n_estimators': 11932, 
                    'max_depth': 16, 
                    'learning_rate': 0.005352340588475586,
                    'lambda_l1': 1.4243404105489683e-06,
                    'lambda_l2': 0.04777178032735788,
                    'num_leaves': 141, 
                    'feature_fraction': 0.6657626611307914, 
                    'bagging_fraction': 0.9115997498937961,
                    'bagging_freq': 1,
                    'min_child_samples': 51,
                     "objective": "binary",
                     #"metric": "binary_logloss",
                     "verbosity": -1,
                     "boosting_type": "gbdt",
                     #"random_state": 228,
                     "metric": "auc",
                     #"device": "gpu",
                     'tree_method': "gpu_hist"
                    }
}

## logs

In [7]:
class Logger:
    """save log"""
    def __init__(self, path):
        self.general_logger = logging.getLogger(path)
        stream_handler = logging.StreamHandler()
        file_general_handler = logging.FileHandler(os.path.join(path, 'Experiment.log'))
        if len(self.general_logger.handlers) == 0:
            self.general_logger.addHandler(stream_handler)
            self.general_logger.addHandler(file_general_handler)
            self.general_logger.setLevel(logging.INFO)

    def info(self, message):
        # display time
        self.general_logger.info('[{}] - {}'.format(self.now_string(), message))

    @staticmethod
    def now_string():
        return str(datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
    
    
class Util:
    """save & load"""
    @classmethod
    def dump(cls, value, path):
        joblib.dump(value, path, compress=True)

    @classmethod
    def load(cls, path):
        return joblib.load(path)
        
class HorizontalDisplay:
    """display dataframe"""
    def __init__(self, *args):
        self.args = args

    def _repr_html_(self):
        template = '<div style="float: left; padding: 10px;">{0}</div>'
        return "\n".join(template.format(arg._repr_html_())
                         for arg in self.args)

In [8]:
# -*- coding: utf-8 -*-
"""Config class"""
import json
from types import SimpleNamespace
class Config:
    name_v1 = "lgb baseline"
    """Config class which contains data, train and model hyperparameters"""
    def __init__(self, data, train, model):
        self.data = data
        self.train = train
        self.model = model
    @classmethod
    def from_json(cls, cfg):
        """Creates config from json"""
        params = json.loads(json.dumps(cfg), object_hook=lambda d: SimpleNamespace(**d))
        return cls(params.data, params.train, params.model)

class HelperObject(object):
    """Helper class to convert json into Python object"""
    def __init__(self, dict_):
        self.__dict__.update(dict_)

# Load Data

In [9]:
# -*- coding: utf-8 -*-
"""Data Loader"""
class DataLoader:
    """Data Loader class"""
    @staticmethod
    def load_data(data_config):
        """Loads dataset from path"""
        return pd.read_csv(data_config.path)
    
%time
if __name__ == "__main__":
    train = DataLoader().load_data(Config.from_json(cfg).data)
    print(train.head())
    print('shape of data {}'.format(train.shape))

Wall time: 0 ns
  Category                                            Message
0      ham  Go until jurong point, crazy.. Available only ...
1      ham                      Ok lar... Joking wif u oni...
2     spam  Free entry in 2 a wkly comp to win FA Cup fina...
3      ham  U dun say so early hor... U c already then say...
4      ham  Nah I don't think he goes to usf, he lives aro...
shape of data (5572, 2)


In [12]:
current_path = os.getcwd()
current_path

'C:\\Users\\rzouga\\Desktop\\ALLINHERE\\ALLINHERE\\Deploy_Spam_Detection\\notebook'

# Preprocess Data: 

In [13]:
dataloder=DataLoader()
data = dataloder.load_data(Config.from_json(cfg).data)
data.head()

Unnamed: 0,Category,Message
0,ham,"Go until jurong point, crazy.. Available only ..."
1,ham,Ok lar... Joking wif u oni...
2,spam,Free entry in 2 a wkly comp to win FA Cup fina...
3,ham,U dun say so early hor... U c already then say...
4,ham,"Nah I don't think he goes to usf, he lives aro..."


In [14]:
data.shape

(5572, 2)

# Preprocessing 

In [16]:
#data = pd.read_csv('./data/spam_data.csv')
# Text Preprocessing
def preprocessor(text):
    text = re.sub('<[^>]*>', '', text) # Effectively removes HTML markup tags
    emoticons = re.findall('(?::|;|=)(?:-)?(?:\)|\(|D|P)', text)
    text = re.sub('[\W]+', ' ', text.lower()) + ' '.join(emoticons).replace('-', '')
    return text
# Define X and y 
X = data['Message'].apply(preprocessor)
y = data['Category']

# Train Test Split 

In [18]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


In [21]:
X_test.iloc[0]

'squeeeeeze this is christmas hug if u lik my frndshp den hug me back if u get 3 u r cute 6 u r luvd 9 u r so lucky none people hate u :) ;)'

In [30]:
y_test.iloc[0]

'ham'

In [32]:
y.unique()

array(['ham', 'spam'], dtype=object)

# Train Data

In [19]:
tfidf = TfidfVectorizer(strip_accents=None, lowercase=False, 
                        max_features=700, 
                        ngram_range=(1,1))
neural_net_pipeline = Pipeline([('vectorizer', tfidf), 
                                ('nn', MLPClassifier(hidden_layer_sizes=(700, 700)))])
neural_net_pipeline.fit(X_train, y_train)
# Testing the Pipeline
y_pred = neural_net_pipeline.predict(X_test)
print(classification_report(y_test, y_pred))
print('Accuracy: {} %'.format(100 * accuracy_score(y_test, y_pred)))
# Saving the Pipeline
dump(neural_net_pipeline, 'C:/Users/rzouga/Desktop/ALLINHERE/ALLINHERE/Deploy_Spam_Detection/models/spam_classifier.joblib')

              precision    recall  f1-score   support

         ham       0.99      0.99      0.99      1448
        spam       0.96      0.92      0.94       224

    accuracy                           0.98      1672
   macro avg       0.97      0.96      0.96      1672
weighted avg       0.98      0.98      0.98      1672

Accuracy: 98.38516746411483 %


['C:/Users/rzouga/Desktop/ALLINHERE/ALLINHERE/Deploy_Spam_Detection/models/spam_classifier.joblib']

# Predict :

In [4]:
#load model data
import joblib
#f = "C:/Users/rzouga/Desktop/ALLINHERE/ALLINHERE/FraudDetection/DeployPipeComplet/models/model_test.joblib"
f = "C:/Users/rzouga/Desktop/ALLINHERE/ALLINHERE/Deploy_Spam_Detection/models/spam_classifier.joblib"
model = joblib.load(f)
item={}
item={"message":'squeeeeeze this is christmas hug if u lik my frndshp den hug me back if u get 3 u r cute 6 u r luvd 9 u r so lucky none people hate u :) ;)'
     }
df = pd.json_normalize(item)
df2 =pd.DataFrame([item])
prediction = model.predict_proba(df2)
prediction[0][1]

0.21986705334812853

In [5]:
df

Unnamed: 0,message
0,squeeeeeze this is christmas hug if u lik my f...


In [6]:
prediction = model.predict(df)
prediction[0]

'ham'

In [7]:
prediction = model.predict_proba(df)
prediction_final=model.predict(df)
h={"proba": prediction[0][1], "result" :prediction_final[0]}
print(h)

{'proba': 0.21986705334812853, 'result': 'ham'}


In [8]:
# First, we will need to import the library and initialize the main application object:
import joblib
import uvicorn
from fastapi import FastAPI,Request, File, UploadFile, HTTPException
from pydantic import BaseModel
import pandas as pd
import numpy as np 
import nest_asyncio
from typing import Any, Dict,List
        
## API INSTANTIATION
## ----------------------------------------------------------------
       
app = FastAPI(
    title="Spam Detection API",
    description="A simple API that use Ml model to predict Spam ",
    version="0.1",
)
# Creating the data model for data validation
class ClientData(BaseModel):
    message: str
#load model data
f = "C:/Users/rzouga/Desktop/ALLINHERE/ALLINHERE/Deploy_Spam_Detection/models/spam_classifier.joblib"
#f = "C:/Users/rzouga/Desktop/ALLINHERE/ALLINHERE/FraudDetection/DeployPipeComplet/models/pipeline_model_lgbm_final.joblib"
model = joblib.load(f)
    
## API ENDPOINTS
## ----------------------------------------------------------------
## API ENDPOINTS
## ----------------------------------------------------------------

# Preprocess Heleper 
def preprocessor(text):
    text = re.sub('<[^>]*>', '', text) # Effectively removes HTML markup tags
    emoticons = re.findall('(?::|;|=)(?:-)?(?:\)|\(|D|P)', text)
    text = re.sub('[\W]+', ' ', text.lower()) + ' '.join(emoticons).replace('-', '')
    return text
# Predict Function 
def classify_message(model, message):

	message = preprocessor(message)
	label = model.predict([message])[0]
	spam_prob = model.predict_proba([message])

	return {'label': label, 'spam_probability': spam_prob[0][1]}

##################
@app.get('/')
def index():
  '''
  This is a first docstring.
  '''
  return {'message': 'This is a Fraud  Classification API!'}

# Tester
@app.get('/ping')
def ping():
  '''
  This is a first docstring.
  '''
  return ('pong', 200)
# Defining the prediction endpoint without data validation
@app.post('/basic_predict_spam')
async def basic_predict(request: Request):
    '''
    This is a first docstring.
    '''
    # Getting the JSON from the body of the request
    messsage = await request.json()
    return classify_message(model, message)

# We now define the function that will be executed for each URL request and return the value:
@app.post("/predict-spam")
async  def predict_fraud(item :ClientData):
    """
    A simple function that receive a client data and predict Spam.
    :param client_data:
    :return: prediction, probabilities
    """
    # perform prediction
    #df =pd.DataFrame([item])
    #h=item.dict()
    return classify_message(model, str(item))
    
    # Create the POST endpoint with path '/predict'
@app.post("/predict_csv")
async def create_upload_file(file: UploadFile = File(...)):
    # Handle the file only if it is a CSV
    if file.filename.endswith(".csv"):
        # Create a temporary file with the same name as the uploaded 
        # CSV file to load the data into a pandas Dataframe
        with open(file.filename, "wb")as f:
            f.write(file.file.read())
        data = pd.read_csv(file.filename)
        os.remove(file.filename)
        # Return a JSON object containing the model predictions
        return {
            "predictions": model.predict(data)
        }    
    else:
        # Raise a HTTP 400 Exception, indicating Bad Request 
        # (you can learn more about HTTP response status codes here)
        raise HTTPException(status_code=400, detail="Invalid file format. Only CSV Files accepted.")
nest_asyncio.apply()
uvicorn.run(app, port=4000)
# uvicorn app:app --reload

INFO:     Started server process [7612]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://127.0.0.1:4000 (Press CTRL+C to quit)


INFO:     127.0.0.1:50080 - "GET / HTTP/1.1" 200 OK
INFO:     127.0.0.1:50081 - "GET /docs HTTP/1.1" 200 OK
INFO:     127.0.0.1:50081 - "GET /openapi.json HTTP/1.1" 200 OK
INFO:     127.0.0.1:50692 - "POST /predict-fraud HTTP/1.1" 404 Not Found
INFO:     127.0.0.1:50695 - "POST /docs HTTP/1.1" 405 Method Not Allowed
INFO:     127.0.0.1:50703 - "GET /predict_fraud_predict_spam_post HTTP/1.1" 404 Not Found
INFO:     127.0.0.1:50709 - "POST /basic_predict_basic_predict_spam_post HTTP/1.1" 404 Not Found
INFO:     127.0.0.1:50711 - "POST /basic_predict_basic_predict_spam_post HTTP/1.1" 404 Not Found
INFO:     127.0.0.1:50715 - "GET /docs HTTP/1.1" 200 OK
INFO:     127.0.0.1:50715 - "GET /openapi.json HTTP/1.1" 200 OK
INFO:     127.0.0.1:50717 - "POST /predict-spam HTTP/1.1" 422 Unprocessable Entity
INFO:     127.0.0.1:50718 - "POST /predict-spam HTTP/1.1" 200 OK
INFO:     127.0.0.1:50719 - "POST /basic_predict_basic_predict_spam_post HTTP/1.1" 404 Not Found
INFO:     127.0.0.1:50721 - "POST

INFO:     Shutting down
INFO:     Waiting for application shutdown.
INFO:     Application shutdown complete.
INFO:     Finished server process [7612]


In [9]:
import session_info
session_info.show()

In [3]:
!pip install pipreqs

Collecting pipreqs
  Downloading pipreqs-0.4.11-py2.py3-none-any.whl (32 kB)
Collecting docopt
  Downloading docopt-0.6.2.tar.gz (25 kB)
Collecting yarg
  Downloading yarg-0.1.9-py2.py3-none-any.whl (19 kB)
Building wheels for collected packages: docopt
  Building wheel for docopt (setup.py): started
  Building wheel for docopt (setup.py): finished with status 'done'
  Created wheel for docopt: filename=docopt-0.6.2-py2.py3-none-any.whl size=13705 sha256=39ea6745695769f61d1e1374ccd3453a35565ea3935cc5143e71befa5147347f
  Stored in directory: c:\users\rzouga\appdata\local\pip\cache\wheels\56\ea\58\ead137b087d9e326852a851351d1debf4ada529b6ac0ec4e8c
Successfully built docopt
Installing collected packages: yarg, docopt, pipreqs
Successfully installed docopt-0.6.2 pipreqs-0.4.11 yarg-0.1.9


In [None]:
!pipreqs

In [1]:
# First, we will need to import the library and initialize the main application object:
import os
import joblib
import uvicorn
from fastapi import FastAPI, Request, File, UploadFile, HTTPException
from pydantic import BaseModel
# import nest_asyncio
#from utils.pipeline import *
#from utils.preparation import *

# from typing import Any, Dict,List,Enum
# import numpy as np  

"""
1. Set up the FastAPI application
2. Load the model(s) into the application
3. Create required API endpoint(s) for users to submit data:
   - These could be CSV file(s), image(s), JSON object(s), etc.
   - Handle incoming data appropriately
4. Use the indended model to predict the result(s) on the data submitted
5. If successful, return the predictions, else raise an error  

 """

## API INSTANTIATION
## ----------------------------------------------------------------

app = FastAPI(
    title="Spam Detection API",
    description="A simple API that use Ml model to predict Spam ",
    version="0.1",
)

# Preprocess Heleper 
def preprocessor(text):
    text = re.sub('<[^>]*>', '', text) # Effectively removes HTML markup tags
    emoticons = re.findall('(?::|;|=)(?:-)?(?:\)|\(|D|P)', text)
    text = re.sub('[\W]+', ' ', text.lower()) + ' '.join(emoticons).replace('-', '')
    return text
# Predict Function 
def classify_message(model, message):

	message = preprocessor(message)
	label = model.predict([message])[0]
	spam_prob = model.predict_proba([message])

	return {'label': label, 'spam_probability': spam_prob[0][1]}

# Creating the data model for data validation
class ClientData(BaseModel):
    message: str

# Load  the model  a serialized .joblib file
#joblib_filename = "models/pipeline_model_lgbm_final.joblib"
#model = joblib.load(joblib_filename)
with open('models/spam_classifier.joblib', 'rb') as joblib_filename:
    model = joblib.load(joblib_filename)
   

## API ENDPOINTS
## ----------------------------------------------------------------

##################
@app.get('/')
def index():
    """
  This is a first docstring.
  """
    return {'message': 'This is a Fraud  Classification API!'}


# Tester
@app.get('/ping')
def ping():
    '''
  This is a first docstring.
  '''
    return ('pong', 200)


# Defining the prediction endpoint without data validation
@app.post('/basic_predict_spam')
async def basic_predict(request: Request):
    '''
    This is a first docstring.
    '''
    # Getting the JSON from the body of the request
    messsage = await request.json()
    return classify_message(model, message)


# We now define the function that will be executed for each URL request and return the value:
@app.post("/predict-spam")
async  def predict_fraud(item :ClientData):
    """
    A simple function that receive a client data and predict Spam.
    :param client_data:
    :return: prediction, probabilities
    """
    # perform prediction
    #df =pd.DataFrame([item])
    #h=item.dict()
    return classify_message(model, str(item))
	
# Create the POST endpoint with path '/predict_csv'
@app.post("/predict_csv")
async def create_upload_file(file: UploadFile = File(...)):
    # Handle the file only if it is a CSV
    if file.filename.endswith(".csv"):
        # Create a temporary file with the same name as the uploaded 
        # CSV file to load the data into a pandas Dataframe
        with open(file.filename, "wb")as f:
            f.write(file.file.read())
        data = pd.read_csv(file.filename)
        os.remove(file.filename)
        # Return a JSON object containing the model predictions
        return {
            "predections": model.predict(data)
        }    
    else:
        # Raise a HTTP 400 Exception, indicating Bad Request 
        # (you can learn more about HTTP response status codes here)
        raise HTTPException(status_code=400, detail="Invalid file format. Only CSV Files accepted.")


nest_asyncio.apply()
uvicorn.run(app, port=4000)
# uvicorn app:app --reload

ModuleNotFoundError: No module named 'utils'