In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import json
import os
from dotenv import load_dotenv
import pymysql.cursors
import datetime

In [4]:
load_dotenv();

In [6]:
plt.style.use('dark_background') 

def highlight_odd_rows(s):
    '''
    Changes style of a dataframe visualization
    Args:
       s : The row of the dataframe
    Returns:
    '''
    styles = []
    for i in range(len(s)):
        if i % 2 == 1:
            styles.append('background-color: indigo; border: 1px solid white;')
        else:
            styles.append('border: 1px solid white;')
    return styles

In [38]:
class DataFrame_analysis:
    '''
    This class contains every method for regularization microservice function analysis
    '''
    #Class variables

    #The init method 
    def __init__(self):
        pass

    #Public
    def getDfSize(self, df):
        '''
        Returns the rows and columns of a dataframe
        Args:
           df (Dataframe) : Dataframe to analyze
        Returns:
          ... (dictionary): A dictionary with rows and columns as keys
        '''
        rows = len(df.axes[0])
        columns = len(df.axes[1])
        return {'rows': rows, 'columns': columns}

    #Public
    def determineDistinctValues(self, df, col):
        '''
        Determines the unrepeated values in a columns
        Args:
           df (DataFrame) : Dataframe which contains the column to search
           col (string) : Column's name to search
        Returns:
           df[col].unique() (array): An array (iterable object) with the unrepeated values of the column
        '''
        return df[col].unique()

    #Public
    def determineDistinctKeys(self, columnToList): #Recibe una lista, no una dataframe column
        '''
        Extracts the keys of a dictionary list and saves it in a dictionary with its count of appearences in the list
        Args:
           columnList (list) : List of dictionary to extract the keys
        Returns:
           distinctKeys (dictionary): A dictionary with the keys as a string and its appearences count in the list
        '''
        distinctKeysDict = {}
        distinctKeysList = []
        for dictionary in columnToList:
            if str(dictionary)[0] == "{" :
                dictionary = json.loads(dictionary)
                orderedKeys = sorted(list(dictionary.keys()))
                if orderedKeys in distinctKeysList:
                    distinctKeysDict['/'.join(orderedKeys)] += 1
                else:
                    distinctKeysList.append(orderedKeys)
                    distinctKeysDict['/'.join(orderedKeys)] = 1
            else:
                if ''.join(['notKey:',dictionary]) in distinctKeysList:
                    distinctKeysDict[''.join(['notKey:',dictionary])] += 1
                else:
                    distinctKeysList.append(''.join(['notKey:',dictionary]))
                    distinctKeysDict[''.join(['notKey:',dictionary])] = 1
        return distinctKeysDict     

    #Public
    def countAlertCodes(self, df, alertsColumn: str):
        '''
        Counts the alert codes in a column of a dataframe
        Args:
           df (DataFrame) : Dataframe which contains the column to search
           alertsColumn (string) : Column's name to search
        Returns:
           codeCount (dictionary): A dictionary with the distinct alert codes as keys and the appearence count in the column
        '''
        alertsWithCode = filter(lambda x: x != [],df['alertas'].values.tolist())
        codeCount = {'Code 1': 0 , 'Code 2': 0, 'Code 3': 0, 'Code 4': 0, 'Code 5': 0, 'Code 6': 0, 'Code 7': 0, 'invalid': 0}
    
        for alert in alertsWithCode:
            if str(alert)[0] =='[':
                alertList = json.loads(alert)
                if alertList != []:
                    for subalert in alertList:
                        n = subalert['codigo']
                        codeCount[''.join(['Code ', str(n)])] += 1
            else:
               codeCount['invalid']+=1
    
        return codeCount 
    
    #Public
    def calculateDeltaTime(self, df, columnA, columnB):
        '''
        Converts columnA and columnB in datetime type and makes the difference between both columns
        Args:
           df (DataFrame) : Dataframe which contains the columns to substraction
           columnA (string) : Minuend column
           columnB (string): Substrahend column
        Returns:
           responsePeriod (DataFrame): Returns a column of the seconds difference as a dataframe
        '''
        responseTimeType = pd.to_datetime(df[columnA], format='%Y-%m-%d %H:%M:%S')
        requestTimeType = pd.to_datetime(df[columnB], format='%Y-%m-%d %H:%M:%S')
        responsePeriod = (responseTimeType - requestTimeType).to_frame()
        responsePeriod['secondsDifference'] = pd.to_timedelta(responsePeriod[0]).dt.total_seconds()
        responsePeriod.drop([0], axis='columns', inplace=True)

        return responsePeriod

    #Public
    def proveErrorAlert(self, df):
        '''
        Assures every error in CarfaxUsaData is associated with an alert
        Args:
           df (DataFrame) : Dataframe which contains the columns to substraction
        Returns:
           ... (string): Returns a status message
        '''
        carfaxUsaData = df['carfaxUsaData'].values.tolist()
        conError = 0
        for index1 in range(len(carfaxUsaData)):
            carfaxDict = json.loads(carfaxUsaData[index1])
            if 'error' in list(carfaxDict.keys()):
                conError += 1
                if df.iloc[index1]['alertas'] == []:
                    return True
        return False

    #Private
    def getRepeatedValuesInAColumn(self, df, columnName):
        '''
        Identifies the repeated values of a column in a dataframe
        Args:
           df (DataFrame) : Dataframe which contains the columns to search
           columnName (string) : Column's name to search
        Returns:
           duplicates['VIN'] (pandas Series): Returns a column as a pandas series of the duplicated values of the columnName
        '''
        duplicates = df[df.duplicated(subset=[columnName], keep=False)]
        return duplicates['VIN']

    #Private
    def searchValuesInADataframe(self, df, values:list, columnA: str, columnB: str)-> dict:
        '''
        Optimized version to search values in a dataframe columnA and retrieve the corresponding values of columnB.
        Args:
           values (list): The list of values to be searched.
           columnA (str): The name of the column to search.
           columnB (str): The name of the column to retrieve values from.
           df (DataFrame): The dataframe to search in.
        Returns:
          grouped: A dictionary with keys as the values from columnA and values as the list of corresponding entries from columnB.
        '''
        
        filtered_df = df[df[columnA].isin(values)]
    
        grouped = filtered_df.groupby(columnA)[columnB].apply(list).to_dict()
    
        return grouped

    #Public
    def getIncorrectRepeatedVinInformation(self, df)->dict:
        '''
        Searches the values of a list in a dataframe columnA and retrieves the information of columnB
        Args:
           df (DataFrame): The dataframe where where will be searched
        Returns:
          ... (dict): A dictionary which contains a status message, a list of good and bad VINs withits appeareance count, and the total count of good and bad VINS
        '''
        repeatedRows = len(self.getRepeatedValuesInAColumn(df, 'VIN').values.tolist())
        vinValues = self.getRepeatedValuesInAColumn(df, 'VIN').unique().tolist()
        repeatedVins = len(vinValues)
        repeatedValues = self.searchValuesInADataframe(df, vinValues, 'VIN', 'alertas')
        goodVins = 0
        badVins = 0
        goodVinsList = []
        badVinsList = []
        for repeatedVin in list(repeatedValues.keys()):
            differentValuesByVin = list(set(repeatedValues[repeatedVin]))
            for differentValue in differentValuesByVin:
                if differentValue == '[]':
                    goodVins += 1
                    if len(differentValuesByVin)>1:
                        return {'message': ''.join(['Alerta con vin: ',repeatedVin, ' , tiene y no tiene alertas.']),
                               'goodVinsList': [],
                               'badVinsList': [],
                              'goodVins':0,
                               'badVins': 0}
                    else:
                        goodVinsList.append([repeatedVin, len(repeatedValues[repeatedVin])])
                else:
                    badVins += 1
                    badVinsList.append([repeatedVin, len(repeatedValues[repeatedVin])])
            

        return {'message': '',
               'goodVinsList': goodVinsList,
               'badVinsList': badVinsList,
               'goodVins':goodVins,
               'badVins': badVins}

    #Public
    def diagnoseVins(self, df):
        '''
        Converts the lists of  getIncorrectRepeatedVinInformation() function in dataframes
        Args:
           df (DataFrame): The dataframe where where will be searched
        Returns:
          ... (dict): A dictionary which contains the good and bad VINs dataframes and the good and bad repeated Records of the VINs
        '''
        goodVinsFrame = pd.DataFrame(self.getIncorrectRepeatedVinInformation(df)['goodVinsList'], columns = ['VIN', 'repeatedTimes'])
        goodRepeatedRecords = goodVinsFrame.sum()['repeatedTimes']
        badVinsFrame = pd.DataFrame(self.getIncorrectRepeatedVinInformation(df)['badVinsList'], columns = ['VIN', 'repeatedTimes'])
        badRepeatedRecords = badVinsFrame.sum()['repeatedTimes']

        return {'goodVinsFrame': goodVinsFrame, 'badVinsFrame': badVinsFrame, 'goodRepeatedRecords': goodRepeatedRecords, 'badRepeatedRecords': badRepeatedRecords}

    #Public
    def verifyInfoRepeatedVins(self, df):
        '''
        Verifies every repeated VIN has the same information in every record (due to the short period time reviewed)
        Args:
           df (DataFrame): The dataframe where where will be searched
        Returns:
          ... (dict): A dictionary which contains the different responses as a dict for every repeated VIN in uniqueResponses and VINs with more than one response in differentResponse key
        '''
        uniqueResponses = {}
        differentResponses = {}
        vinValues = getRepeatedValuesInAColumn(df, 'VIN').unique().tolist()
        repeatedValues = searchValuesInADataframe(df, vinValues, 'VIN', 'responseBody')
        repeatedVins = repeatedValues.keys()
        for vin in repeatedVins:
            uniqueResponses[vin] = []
            listJson = []
            for responseString in repeatedValues[vin]:
                responseJson = json.loads(responseString)
                listJson.append(responseJson)
            repeatedValues[vin] = listJson
        for vin in repeatedVins:
            for responseJson in repeatedValues[vin]:
                keys = list(responseJson.keys())
                info = {}
                if 'anioModelo' in keys:
                    if 'fabricante' in keys and 'paisOrigen' in keys:
                        info = {'anioModelo': responseJson['anioModelo'], 'fabricante': responseJson['fabricante'], 'marca': responseJson['marca'], 'modelo': responseJson['modelo'], 'paisOrigen': responseJson['paisOrigen'], 'robo': responseJson['robo'], 'roboFecha': responseJson['roboFecha'], 'codes': []}
                    else:
                         info = {'anioModelo': responseJson['anioModelo'], 'marca': responseJson['marca'], 'modelo': responseJson['modelo'], 'robo': responseJson['robo'], 'roboFecha': responseJson['roboFecha'], 'codes': []}
                if 'mensajes' in keys and type(responseJson['mensajes'])== 'list' and responseJson['mensajes'] != []:
                        for message in responseJson['mensajes']:
                            if 'codes' in list(message.keys()):
                                info['codes'].append(message['codigo'])
                                info['codes'] = list(set(info['codes']))
                        if info.get('codes')!= None and len(info['codes'])>1:
                            info['codes'] = info['codes'].sort()
                if uniqueResponses[vin] == []:
                     uniqueResponses[vin].append(info)
                else:
                    if info not in uniqueResponses[vin]:
                        uniqueResponses[vin].append(info)
                        differentResponses[vin] = info
        return {'uniqueResponses': uniqueResponses, 'differentResponses': differentResponses}    

In [28]:
def connectToDatabase():
    ssl_config = {
        "ssl_ca": os.getenv('CA-CERTIFICATE'),
        "ssl_cert": os.getenv('CLIENT-CERTIFICATE'),
        "ssl_key": os.getenv('CLIENT-KEY')
    }
    
    db_config = {
        "user": os.getenv('USER_MYSQL'),
        "password": os.getenv('PASSWORD'),
        "host": os.getenv('HOST'),  
        "port": int(os.getenv('PORT')),
        "database": os.getenv('DATABASE'),
        "ssl":ssl_config
    } 
    try:    
        conn = pymysql.connect(**db_config)    
        return conn
    except pymysql.Error as err:
        return err

def connectHealth():
    try:
        conn = connectToDatabase()
        cursor = conn.cursor() 
        cursor.execute('SHOW TABLES;')
        result = cursor.fetchall()
        disconnectToDatabase(conn, cursor)
        return f"Connected to database: {list(result)}"
    except AttributeError as atr_err:
        return atr_err
    

def disconnectToDatabase(conn, cursor):
    # Close the cursor and connection
    cursor.close()
    conn.close()

def getTheLastDate(view: str):
     try:
         conn = connectToDatabase()
         cursor = conn.cursor() 
         cursor.execute(f"SELECT * FROM {view} ORDER BY STR_TO_DATE(responseTime, '%Y-%m-%d %H:%i:%s') DESC;")
         result = cursor.fetchone()
         disconnectToDatabase(conn, cursor)
         return result
     except AttributeError as atr_err:
         return atr_err

def getTheDataframe(view: str, n_minutes:int, lastTime: str):
     try:
         conn = connectToDatabase()
         cursor = conn.cursor() 
         query = f"SELECT *FROM new_view WHERE STR_TO_DATE(responseTime, '%Y-%m-%d %H:%i:%s') BETWEEN DATE_SUB(STR_TO_DATE('{lastTime}', '%Y-%m-%d %H:%i:%s'), INTERVAL {n_minutes} MINUTE) AND STR_TO_DATE('{lastTime}', '%Y-%m-%d %H:%i:%s') ORDER BY STR_TO_DATE(responseTime, '%Y-%m-%d %H:%i:%s') DESC;"
         cursor.execute(query)
         result = cursor.fetchall()
         disconnectToDatabase(conn, cursor)
         return result
     except AttributeError as atr_err:
         return atr_err

In [42]:
view = os.getenv('VIEW_NAME')
n_minutes = os.getenv('DIFFERENCE_MINUTES')

In [18]:
connectHealth()

{'user': 'root', 'password': 'example', 'host': 'localhost', 'port': 3306, 'database': 'regularization', 'ssl': {'ssl_ca': '/certificates/ca-crt.pem', 'ssl_cert': '/certificates/client-crt-pem', 'ssl_key': '/certificates/client-key.pem'}}


"Connected to database: [('new_view',), ('regularization_table',)]"

In [20]:
#lastTime = datetime.datetime.now()
lastTime = list(getTheLastDate(view))[12]
lastTime

{'user': 'root', 'password': 'example', 'host': 'localhost', 'port': 3306, 'database': 'regularization', 'ssl': {'ssl_ca': '/certificates/ca-crt.pem', 'ssl_cert': '/certificates/client-crt-pem', 'ssl_key': '/certificates/client-key.pem'}}


'2024-11-21 11:02:06'

In [22]:
getTheDataframe(view, n_minutes, lastTime)

{'user': 'root', 'password': 'example', 'host': 'localhost', 'port': 3306, 'database': 'regularization', 'ssl': {'ssl_ca': '/certificates/ca-crt.pem', 'ssl_cert': '/certificates/client-crt-pem', 'ssl_key': '/certificates/client-key.pem'}}


(('1GCWGGBA0D1111037',
  '673f678e12327586f2ae7414',
  '10.240.2.38:80',
  '201.116.128.213:8772,10.240.8.27, 127.0.0.6',
  '6cNqbbTqBnZEchPZ2YYn',
  '',
  '',
  '{}',
  '/v2/regularizacion/1GCWGGBA0D1111037',
  '200',
  'ok',
  '2024-11-21 11:02:06',
  '2024-11-21 11:02:06',
  '',
  '673f678e12327586f2ae7415',
  '673f678e12327586f2ae7414',
  '{"robo": false, "marca": "CHEVROLET", "modelo": "EXPRESS 2500", "mensajes": [], "roboFecha": null, "anioModelo": "2013", "fabricante": "GENERAL MOTORS LLC", "paisOrigen": "ESTADOS UNIDOS (USA)", "tiempoConsulta": "2024-11-21T11:02:06-06:00", "tiempoRespuesta": "2024-11-21T11:02:06-06:00"}',
  '{"vin": "1GCWGGBA0D1111037", "timestamp": "2024-11-21T11:01:44.000", "components": {"potentialFraudFile": {"make": "CHEVROLET", "year": 2013, "model": "EXPRESS 2500", "vinCloningFlags": [], "rateEvasionFlags": [], "potentialFraudFlags": []}}, "customCalculationOdometer": ""}',
  '[]',
  '1'),
 ('1GCWGGBAOD1111037',
  '673f678a74141818244e5119',
  '10.240.2.

In [44]:
regularizationFrame = pd.DataFrame(list(getTheDataframe(view, nMinutes, lastTime)), columns = ['VIN', '_id', 'host', 'ip', 'apiKey', 'userId', 'idReporte',
       'requestParameters', 'requestUrl', 'responseCode', 'responseCodeStatus',
       'requestTime', 'responseTime', 'labels', 'idRespuesta', 'idConsulta',
       'responseBody', 'carfaxUsaData', 'alertas', 'firewallUsa'])
regularizationFrame = regularizationFrame.fillna('xD')

In [62]:
filteredAnalysis = DataFrame_analysis()

In [58]:
#Alerta de bajo nivel de IP's
ips_vins_frame = (
    regularizationFrame.groupby('ip')['VIN']
    .nunique()  
    .reset_index()  
    .rename(columns={'VIN': 'count'})  
)
ipsMax = int(ips_vins_frame['count'].max())
if ipsMax > 20:
    suspiciousIps = list(ips_vins_frame[ips_vins_frame['count'] == ipsMax]['ip'])
    print('Alerta de ips que checan más de 20 VINs distintos' + str(suspiciousIps))

4


In [80]:
#Alerta de errores de código http. Prioridad media-alta
regularizationFrame['responseCode'] = regularizationFrame['responseCode'].astype(int)

bad_codes = regularizationFrame[regularizationFrame['responseCode'] >= 500]

bad_code_counts = bad_codes['responseCode'].value_counts()

for code, count in bad_code_counts.items():
    if count == 2:
        print(f'Alerta media de errores HTTP con código {code}')
    elif count > 2:
        print(f'Alerta alta de errores HTTP con código {code}')

96
Alerta alta de errores HTTP con código 200


In [90]:
apiKey = os.getenv('API_KEY_REGULARIZATION')

bad_apiKeys = regularizationFrame[regularizationFrame['apiKey'] != apiKey]

bad_apiKeys_counts = bad_apiKeys['apiKey'].value_counts()

for apiKey, count in bad_apiKeys_counts.items():
    if count == 2:
        print(f'Alerta media para la api Key {apiKey}')
    elif count > 2:
        print(f'Alerta alta para la apiKey {apiKey}')

Alerta alta para la apiKey 6cNqbbTqBnZEchPZ2YYn


In [64]:
responseBody = regularizationFrame['responseBody'].values.tolist()
filteredAnalysis.determineDistinctKeys(responseBody)

{'anioModelo/fabricante/marca/mensajes/modelo/paisOrigen/robo/roboFecha/tiempoConsulta/tiempoRespuesta': 96}

In [34]:
filteredAnalysis = DataFrame_analysis()
maxTimeAllowed = 6
totalRows = filteredAnalysis.getDfSize(regularizationFrame)['rows']
partialRows = 1
recordsAnalysis = []
codesAnalysis = []
vinsAnalysis = []
regularizationFrame['responseTime'] = pd.to_datetime(regularizationFrame['responseTime'], format='%Y-%m-%d %H:%M:%S')
dataframeIndex=0


while partialRows < totalRows:
    #DATA SEPARATION BY TIME LAPSE n_minutes
    first_timestamp = regularizationFrame['responseTime'].iloc[partialRows-1]
    print('first', first_timestamp)
    time_threshold = first_timestamp - pd.Timedelta(minutes=n_minutes)
    filtered_df = regularizationFrame[(regularizationFrame['responseTime'] > time_threshold) & (regularizationFrame['responseTime'] <= first_timestamp)].fillna('xD')
    print('first', filtered_df['responseTime'].values[0:1], filtered_df['idRespuesta'].values[0:1])
    print('last',filtered_df['responseTime'].values[-1:], filtered_df['idRespuesta'].values[-1:])
    timeLapse = '- '.join([str(first_timestamp),str(time_threshold)])
    
    ##SHAPE DATA ANALYSIS
    if list(filteredAnalysis.determineDistinctKeys(filtered_df['requestParameters']).keys()) !=['']:
        print('Existen peticiones con RequestParameters inconvencionales')
    if list(filteredAnalysis.determineDistinctValues(filtered_df, 'apiKey'))!=['6cNqbbTqBnZEchPZ2YYn']:
        print('Existen peticiones con distinta ApiKey')  
    if list(filteredAnalysis.determineDistinctValues(filtered_df, 'userId'))!=['xD']:
        print('Existen peticiones con distinto userId')  
    if list(filteredAnalysis.determineDistinctValues(filtered_df, 'responseCode'))!=[200] and filteredAnalysis.determineDistinctValues(filtered_df, 'responseCodeStatus').values.list()!=['ok']:
        print('Existen peticiones con distinto responseCode')
    if list(filteredAnalysis.determineDistinctValues(filtered_df, 'labels'))!=['xD']:
        print('Existen peticiones con distinto labels')
    if list(filteredAnalysis.determineDistinctValues(filtered_df, 'firewallUsa'))!=[0]:
        print('Existen peticiones con distinto firewallUsa')
    totalRecords = filteredAnalysis.getDfSize(filtered_df)['rows']
    
    ##CONTENT DATA ANALYSIS
    
    #Alerts Analysis
    codeCount = filteredAnalysis.countAlertCodes(filtered_df, 'alertas')
    codesAnalysis.append([timeLapse])
    for code in codeCount.keys():
        codesAnalysis[dataframeIndex].append(codeCount[code])

    #Response Time Analysis
    responseTime = filteredAnalysis.calculateDeltaTime(filtered_df, 'responseTime', 'requestTime')
    #plt.hist(responseTime['secondsDifference'], bins = 20, edgecolor = 'black')
    #plt.xlabel('Time in Seconds')
    #plt.ylabel('Frequency')
    #plt.title(f'Histogram of Delay in Seconds {''.join([timeLapse.split(' ')[1],timeLapse.split(' ')[3]])}')
    #plt.show()

    maxTimeResponse = responseTime.max().values.tolist()[0]
    if maxTimeResponse > maxTimeAllowed:
        print('Existen peticiones que están tardando en responder más de lo usual: ', str(maxTimeResponse))

    #Congruence CarfaxUsaData-Alerts
    if filteredAnalysis.proveErrorAlert(filtered_df):
        print('Existen registros con error y sin alerta')

    #Good-Bad VINs Relation
    vinsRelations = filteredAnalysis.getIncorrectRepeatedVinInformation(filtered_df)
    if vinsRelations['message']!='':
        print( vinsRelations['message'])
    else:
        goodVins=vinsRelations['goodVins']
        badVins=vinsRelations['badVins']
        vinsAnalysis.append([timeLapse, badVins+goodVins, goodVins, badVins])
        
    #Good-Bad Records Relations
    framesSources = filteredAnalysis.diagnoseVins(filtered_df)
    goodRecords = filteredAnalysis.diagnoseVins(filtered_df)['goodRepeatedRecords']
    badRecords = filteredAnalysis.diagnoseVins(filtered_df)['badRepeatedRecords']
    radio = badRecords/totalRecords
    recordsAnalysis.append([timeLapse, totalRecords, goodRecords, badRecords, radio])

    #Review congruent information Repeated Vins
    if filteredAnalysis.verifyInfoRepeatedVins(filtered_df)['differentResponses'] != {}:
         print('Existen respuestas distintas para un mismo VIN: ', verifyInfoRepeatedVins(df)['differentResponses'])
     
    partialRows += totalRecords  
    print(partialRows)
    dataframeIndex+=1

%time

first 2024-11-21 11:02:06
first ['2024-11-21T11:02:06.000000000'] ['673f678e12327586f2ae7415']
last ['2024-11-21T10:47:07.000000000'] ['673f640b12327586f2ae73d5']


TypeError: DataFrame_analysis.determineDistinctKeys() takes 1 positional argument but 2 were given

In [None]:
vinsFrame = pd.DataFrame(vinsAnalysis, columns=['timeLapse', 'total', 'right', 'wrong'])
recordsFrame = pd.DataFrame(recordsAnalysis, columns = ['timeLapse', 'total', 'right', 'wrong', 'radio'])
codesFrame = pd.DataFrame(codesAnalysis, columns = ['timeLapse', 'code1', 'code2', 'code3', 'code4', 'code5', 'code6', 'code7'])

In [None]:
plt.plot(vinsFrame['timeLapse'], vinsFrame['total'], marker='o', linestyle='-', color='w', label = 'Total Repeated Vins vs Time Lapse')
plt.plot(vinsFrame['timeLapse'], vinsFrame['right'], marker='o', linestyle='-', color='c', label = 'Right Repeated Vins vs Time Lapse')
plt.plot(vinsFrame['timeLapse'], vinsFrame['wrong'], marker='o', linestyle='-', color='y', label = 'Wrong Repeated Vins vs Time Lapse')
plt.xlabel('Time Lapse')
plt.ylabel('Repeated VINs')
plt.title('Graph of Repeated VINs')
plt.legend()
plt.show()

In [None]:
plt.plot(recordsFrame['timeLapse'], recordsFrame['total'], marker='o', linestyle='-', color='w', label = 'Total Repeated Records vs Time Lapse')
plt.plot(recordsFrame['timeLapse'], recordsFrame['right'], marker='o', linestyle='-', color='c', label = 'Right Repeated Records vs Time Lapse')
plt.plot(recordsFrame['timeLapse'], recordsFrame['wrong'], marker='o', linestyle='-', color='y', label = 'Wrong Repeated Records vs Time Lapse')
plt.xlabel('Time Lapse')
plt.ylabel('Repeated Records')
plt.title('Graph of Repeated Records')
plt.legend()
plt.show()

In [None]:
plt.plot(recordsFrame['timeLapse'], recordsFrame['radio'], marker='o', linestyle='-', color='w', label = 'Alert Radio')
plt.xlabel('Time Lapse')
plt.ylabel('Proportion')
plt.title('Graph of Bad Records Proportion')
plt.legend()
plt.show()

In [None]:
plt.plot(codesFrame['timeLapse'], codesFrame['code7'], marker='o', linestyle='-', color='w', label = 'Code 7 Appearences vs Time Lapse')
plt.plot(codesFrame['timeLapse'], codesFrame['code6'], marker='o', linestyle='-', color='c', label = 'Code 6 Appearences vs Time Lapse')
plt.plot(codesFrame['timeLapse'], codesFrame['code4'], marker='o', linestyle='-', color='y', label = 'Code 4 Appearences vs Time Lapse')
plt.plot(codesFrame['timeLapse'], codesFrame['code5'], marker='o', linestyle='-', color='r', label = 'Code 5 Appearences vs Time Lapse')
plt.xlabel('Time Lapse')
plt.ylabel('Repeated Records')
plt.title('Graph of Alert Code Appearences')
plt.legend()
plt.show()

In [None]:
codesFrame.sum()

In [None]:
recordsFrame.sum()

In [None]:
recordsFrame[['timeLapse', 'radio']].describe()

In [None]:
vinsFrame.sum()

## 