In [1]:
import pandas as pd
import numpy as np

import re
import time
import collections
import yaml
import os


import warnings
warnings.filterwarnings('ignore')

## Data reading 

In [2]:
%%time
data=pd.read_excel("IT Incidents Dashboard FY22 Q4 V1.xlsx")

CPU times: total: 15.7 s
Wall time: 15.7 s


In [3]:
closed_ticket=data[data['Incident state']=='Closed']
closed_ticket.reset_index(drop=True,inplace=True)

In [4]:
closed_ticket= closed_ticket[['Number','Name','Opened','Assignment group', 'Priority', 'Urgency', 'Configuration item', 'Subcategory','Category','Resolved','Business resolve time']]

In [5]:
closed_ticket.dropna(subset=['Name'],inplace=True)
closed_ticket.reset_index(drop=True,inplace=True)

In [6]:
short_description=closed_ticket['Name'][2]
priority=closed_ticket['Priority'][2]

## Joining path

In [7]:
# Load config file
with open('config/config.yaml') as file:
    config= yaml.safe_load(file)

In [8]:
#Get current working dir
cwd_path=os.getcwd()

# receiver email
keyword_startwith = str(config['medium_keyword_startswith'])
medium_keyword = str(config['medium_keyword'])
low_keyword=str(config['low_keyword'])

## String Validation

In [9]:
'''
    The function validates the string to see if it only contains 
    an email address, a website URL, or a phone number. It will 
    return the boolean value after validating.
    
'''



def short_description_validation(text):
    
    #pattern for identifying the mail id
    regex1 = re.compile(r'([A-Za-z0-9]+[.-_])*[A-Za-z0-9]+@[A-Za-z0-9-]+(\.[A-Z|a-z]{2,})+|([A-Za-z0-9]+[.-_])*[A-Za-z0-9]+@[A-Za-z0-9-]+(\.[A-Z|a-z]{2,})+ ')
    
    
    #pattern for identifying the url link
    regex2 = re.compile(r'(https?://[^\s]+)|(www.[^\s]+)|(http?://[^\s]+)|(https?://[^\s]+)+\ |(www.[^\s]+\ )|(http?://[^\s]+\ )')
    
    
    try:
        
        # checking the string containing onlr mail id
        if re.fullmatch(regex1, text):
            message = "Insufficient data exist for the ML engine to continue"
            is_valid='False'
            return is_valid,message
        
       
        
        #checking the string is having any http or website link
        elif re.fullmatch(regex2, text):
            message = 'Insufficient data exist for the ML engine to continue'
            is_valid='False'
            return is_valid,message
        
    
        else:
            
            #checking the string is having any numbers or not
            if text.isdigit():
                message ='Insufficient data exist for the ML engine to continue'
                is_valid='False'
                return is_valid,message
               
            
            else:

                #removing the some special character and sign
                text=re.sub(r'\+|\-|\\|\.|\(|\)|\/','', text)

                #removing the space between digits
                text=re.sub(r'(\d)\s+(\d)', r'\1\2', text)

                #checking the string contains only numbers or not
                if text.isdigit():
                    message = 'Description containing only'
                    is_valid='False'
                    return is_valid,message
                
                else:
                    is_valid='True'
                    return is_valid
                
        
    except Exception as e:
        return e

## Language Detection & translation

In [10]:
from langdetect import detect
from deep_translator import GoogleTranslator

In [11]:
'''
    It analyses the string and determines the language. 
    If the language is not English, it will be translated 
    to English. And the translated English string is returned.
    
'''



def lang_detect(text):
    
    try:
        detected_language=detect(text)
        if detected_language == 'en':
            is_language_detected = 'yes'
            return text,is_language_detected,detected_language

        elif detected_language != 'en':
            text = GoogleTranslator(source='auto', target='en').translate(text)
            try:
                detected_language=detect(text)
                is_language_detected = 'yes'
                return text,is_language_detected,detected_language
            
            except:
                message = 'Features does not exist'
                is_language_detected = 'no'
                return text,is_language_detected,message

    except:
        message = 'Features does not exist'
        is_language_detected = 'no'
        return text,is_language_detected,message

## Text pre-processing & Identifying the non-english character

In [12]:
import nltk
import string
stopwords = nltk.corpus.stopwords.words('english')

In [13]:
'''
    Using regex and the pythion function, this function 
    cleaned up the chunk string. In addition, identify 
    non-English characters by returning the boolean value 
    for the is clean variable and cleaned text.
    
'''

def text_preprocessing(text,language):
    
    try:
    
        #removing the some special character and sign
        text=re.sub(r'\’|\【|\】|\“|\”|\–|\…|\´|\‐|\’|\—|\•|\‘|\°|\€|\™|\uf0e0|\®|\，|\|\！','', text) 

        #removing the some special character and sign
        text=text.translate(str.maketrans('','',string.punctuation))

        #will replace newline with space
        text = re.sub("\n"," ",text)

        #will convert to lower case
        text = text.lower()

        # removing the stop words
        text = ' '.join([word for word in text.split() if word not in (stopwords)])

        #checking for the non-english characters
        join_text=re.sub(r'\ |\  ','', text)
        join_text=join_text.strip()
        
        #validating the string is containing only alphanumeric or not 
        if join_text.isalnum ():
            is_clean = 'yes'
            
            return text,is_clean
        
        else:
            #checking='Non english'
            if language == 'en':
                is_clean = 'yes'
                return text
            
            else:
                is_clean = 'no'
                message = 'Non-English characters were found in the text, preventing the ML engine from proceeding'
                return is_clean,message
            
    except Exception as e:
        return e

## String length validation

In [14]:
'''
    This function checks the length of the string to determine 
    whether it contains one word or one character and then returns 
    a boolean result.
    
'''



def string_length_validation(text):
    
    #calculating the number of word present in string
    string_length = len(text.split())
    
    #calculating the number of characters
    character_length = len(text)
                        
    if string_length <= 1 or character_length <= 1:
        is_valid = 'False'
        message = 'Text is insufficiently character-rich for the ML engine to continue'
        return is_valid,message
                        
    else:
        is_valid = 'True'
        return text,is_valid

## Ticket prioritization

In [15]:
'''
    Analyze the text and determine whether the keyword matches the
    priority keyword list or not.If it matches, it will prioritise 
    
    the ticket one step ahead.

'''


def ticket_prioritization(text,Priority):
    
    try:

        if Priority == config['Medium']:
            medium_prority_keyword = re.compile(medium_keyword)
            if medium_prority_keyword.findall(text):
                new_priority = config['High']
                message = 'Ticket is priortized from medium to high'
                priortize = 'yes'
                return message,priortize,new_priority
            
            elif text.startswith(keyword_startwith):
                new_priority = config['High']
                message = 'Ticket is priortized from medium to high'
                priortize = 'yes'
                return message,priortize,new_priority
            
            else:
                priortize = 'no'
                return text,priortize,Priority

        elif Priority == config['Low']:
            low_prority_keyword = re.compile(low_keyword)
            if low_prority_keyword.findall(text):
                new_priority = config['Medium']
                message = 'Ticket is priortized from low to medium'
                priortize = 'yes'
                return message,priortize,new_priority
            
            else:
                priortize = 'no'
                return text,priortize,Priority
            
        else:
            priortize = 'no'
            return text,priortize,Priority
        
    except Exception as e:
        return e

### Function Integration

In [16]:
def updating_ticket_with_comment(message):
    pass
    

In [17]:
def droping_ticket_with_comment(text):
    return text

In [18]:
def cleaned_short_description_with_priority(text,priority):
    
    # Validating text
    is_valid = short_description_validation(text)
    
    
   
    if is_valid == 'True':
        
        #language translation
        text,is_language_detected,language_or_message = lang_detect(text)
        
        
        if is_language_detected == 'yes':
            
            #cleaning the text and identifying the non-english characters
            text,is_clean = text_preprocessing(text,language_or_message)
            
            if is_clean == 'yes':
                
                #validating the string length
                text,is_valid = string_length_validation(text)
                
                if is_valid == 'True':
                    
                    
                    #ticket prioritization
                    message,priortize,priority = ticket_prioritization(text,priority)
                    
                    if priortize == 'yes':
                        updating_ticket_with_comment(message)
                        return text,priority
                    
                    else:
                        
                        return text,priority
                    
                    
                else:
                    # dropping the ticket with particular comment
                    text=droping_ticket_with_comment(text)
                    return text
                    
                    
            else:
                # dropping the ticket with particular comment
                text = droping_ticket_with_comment(language_message)
                return text
                
                
        else:
            # dropping the ticket with particular comment
            text = droping_ticket_with_comment(is_valid[1])
            return text
            
            
    else:
        # dropping the ticket with particular comment
        text = droping_ticket_with_comment(is_valid[1])
        return text
                        
                        

In [None]:
cleaned_short_description_with_priority(short_description,priority)