IMPORTS

In [None]:
import numpy as np
import pandas as pd
import joblib

In [None]:
from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score

DATAFRAME

In [None]:
df = pd.read_csv('/home/mamisoa/ITU/L3/Mr_Tsinjo/Sardinas_patterson/python/codes.csv')
df = df.drop(['language'], axis=1)
df

TRAINING WITH RANDOM FOREST

In [None]:
X = df.iloc[:, 0:-1].values
Y = df.iloc[:, -1].values

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size = 0.25,random_state = 0)

In [None]:
clf = RandomForestClassifier(random_state=0)
clf = clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)

In [None]:
cm = confusion_matrix(y_test, y_pred)
print(cm)

In [None]:
accuracy_score(y_test, y_pred)

IMPORTATION MODELE

In [None]:
joblib.dump(clf , "/home/mamisoa/ITU/L3/Mr_Tsinjo/Sardinas_patterson/modele/code.joblib")

In [None]:
modele = joblib.load("/home/mamisoa/ITU/L3/Mr_Tsinjo/Sardinas_patterson/modele/code.joblib")
# modele = joblib.load("/home/mamisoa/ITU/L3/Mr_Tsinjo/Sardinas_patterson/modele/code_ok_1.joblib")

IMPLEMENTATION SARDINAS-PATTERSON ALGORITHM

In [None]:
class SardinasPatterson:
    language = list()
    step = list()
    epsilon = ""
    
    def __init__(self, language):
        self.language = language
        self.step.clear()

    def divide(self, L1, L2):
        res = set()
        for elt in L1:
            for item in L2:
                if item.startswith(elt):
                    if(item[len(elt):] == ''):
                        self.epsilon = elt
                    res.add(item[len(elt):])
        return res
    
    def get_L1(self):
        res = self.divide(self.language, self.language)
        res.remove('')
        return res
    
    def get_L_n_plus_1(self, Ln):
        res = self.divide(Ln, self.language)
        return res.union(self.divide(self.language, Ln))
    
    def get_mot(self, lst, mot):
        for item in self.language:
            for elt in lst:
                if(item == (elt + mot)):
                    return elt
        return mot

    def get_contre_exemple(self):
        res = []
        mot = self.epsilon
        temp = self.step
        res.append(mot)
        for i in range(len(temp) - 2, -1, -1 ):
            mot = self.get_mot(temp[i-1], mot)
            res.append(mot)
        return ''.join(res)

    def make_code(self):
        initial = self.language.copy()
        temp = initial.copy()
        temp_2 = None
        for i in range(len(temp)):
            self.language = initial.copy()
            temp = initial.copy()
            temp.pop(i)
            self.language = temp
            temp_2 = temp.copy()
            if(self.is_code()[0] == True):
                return self.language
            for j in range(len(temp_2)):
                temp_2.pop(j)
                self.language = temp_2
                if(self.is_code()[0] == True):
                    return self.language
        return list()

    def is_code(self):
        if(len(self.language) == 1):
            return True, 'It stops at L1'
        temp = self.get_L1()
        count = 1
        self.step.append(temp)
        while('' not in temp):
            temp = self.get_L_n_plus_1(temp)
            if temp in self.step:
                return True, 'It stops at L' + str(count) 
            self.step.append(temp)
            count += 1
        self.step.append(self.language)
        return False, 'It stops at L' + str(count)

TREATMENT OF THE CODE SENT

In [None]:
import math
from nltk.metrics.distance import edit_distance

def get_average_word_length(language):
    res = 0
    for elt in language:
        res += len(elt)
    return round((res / len(language)), 2)

def get_percent_of_elt(language, letter):
    res = 0
    for elt in language:
        res += elt.count(letter) / len(language)
    return res

def get_number_of_elt(language, letter):
    res = 0
    for elt in language:
        res += elt.count(letter)
    return res

def get_frequency_of_number(language, letter):
    res = 0
    diviseur = get_average_word_length(language)
    for elt in language:
        res += elt.count(letter) / diviseur
    return res

def letter_probability(string, letter):
    return string.count(letter) / len(string)
    

def word_entropy(string):
    p_0 = letter_probability(string, '0')
    p_1 = letter_probability(string, '1')
    log_0 = 0
    if(p_0 != 0):
        log_0 = math.log(p_0, 2)
    log_1 = 0
    if(p_1 != 0):
        log_1 = math.log(p_1, 2)
    res = -1
    res *= ((p_0 * log_0) + (p_1 * log_1))
    return res

def entropy(language):
    res = 0
    for elt in language:
        res += word_entropy(elt)
    return res/len(language) 


def calculate_nc(language):
    res = 0
    for string in language:
        p_0 = letter_probability(string, '0')
        p_1 = letter_probability(string, '1')
        res += (p_0 * len(language)) + (p_1 * len(language))
    return res

def count_word_composed_of_letter(language, letter):
    occ = 0
    for elt in language:
        if(elt.count(letter) == len(elt)):
            occ += 1
    return occ

def count_word_composed_of_mixed_letter(language):
    occ = 0
    for elt in language:
        if('01' in elt or '10' in elt):
            occ += 1
    return occ

def levenshtein_average_interval(language):
    total_interval = 0
    pair_nb = 0
    for i in range(len(language)):
        for j in range(i+1, len(language)):
            total_interval += edit_distance(language[i], language[j])
            pair_nb += 1
    return total_interval / pair_nb if pair_nb > 0 else 0


def calcul_value(language):
    res = 0
    occ_0 = count_word_composed_of_letter(language, '0')
    occ_1 = count_word_composed_of_letter(language, '1')
    occ_mixte = count_word_composed_of_mixed_letter(language)
    res = (occ_0 * 2) + (occ_1 * 2) + (occ_mixte * 1) 
    return res / 5

def parity_calculation(language):
    res = 0
    for elt in language:
        res += elt.count('1')
    return res % 2


def special(word):
    value = "".join(word)
    number = int(value, 2)
    return number / (len(word)*240420020)

    
def get_language_data(lang):
    value = {}
    value['length'] = (str(len(lang)))
    value['average_word_length'] = str(get_average_word_length(lang))
#     value['percentage_of_0'] = str(get_percent_of_elt(lang, '0'))
#     value['percentage_of_1'] = str(get_percent_of_elt(lang, '1'))
#     value['percentage_of_01'] = str(get_percent_of_elt(lang, '01'))
#     value['percentage_of_10'] = str(get_percent_of_elt(lang, '10'))
#     value['percentage_of_00'] = str(get_percent_of_elt(lang, '00'))
#     value['percentage_of_11'] = str(get_percent_of_elt(lang, '11'))
    value['frequency_of_0'] = str(get_frequency_of_number(lang, '0'))
    value['frequency_of_1'] = str(get_frequency_of_number(lang, '1'))
    value['frequency_of_01'] = str(get_frequency_of_number(lang, '01'))
    value['frequency_of_10'] = str(get_frequency_of_number(lang, '10'))
    value['frequency_of_00'] = str(get_frequency_of_number(lang, '00'))
    value['frequency_of_11'] = str(get_frequency_of_number(lang, '11'))
    value['levenshtein'] = str(levenshtein_average_interval(lang))
    value['nc'] = str(calculate_nc(lang))
    value['entropy'] = str(entropy(lang))
#     value['parity'] = str(parity_calculation(lang))
    value['special'] = str(special(lang))
    return value

SERVER-SIDE powered by Flask

In [None]:
from flask import Flask,request, jsonify
from flask_cors import CORS, cross_origin

app = Flask(__name__)

    
cors = CORS(app)
app.config['CORS_HEADERS'] = 'Content-Type'

@app.route('/', methods=['GET'])
@cross_origin()
def home():
    return "<h1>Index</h1><p>"


@app.route('/is_code', methods = ['POST' , 'GET'])
@cross_origin()
def is_code():
    try:
        code = request.form.get('code').replace(" ", "")
        code = code.split(",")
        sp = SardinasPatterson(code)
    #     sp.language = code
        temp = get_language_data(code)
        value = pd.DataFrame(temp, index=[0])
        sp_result = sp.is_code()
        model_result = modele.predict(value)[0]
        return jsonify({"sp_result": str(sp_result[0]), "model_result": str(model_result), "code": code})
    except Exception as e:
        print(e)
        return jsonify({"sp_result": "", "model_result": "", "code": str(e)})
    
if __name__ == '__main__':
    app.run(port=5000)