In [None]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')
import re
import time
import collections

In [None]:
%%time
data=pd.read_excel("IT Incidents Dashboard FY22 Q4 V1.xlsx")


Considering only closed ticket to proceed further for analysis, by assuming the closed tickets are having correct values

In [None]:
closed_ticket=data[data['Incident state']=='Closed']
closed_ticket.reset_index(drop=True,inplace=True)

In [None]:
closed_ticket= closed_ticket[['Number','Name','Opened','Assignment group', 'Priority', 'Urgency', 'Configuration item', 'Subcategory','Category','Resolved','Business resolve time']]

In [None]:
closed_ticket.dropna(subset=['Name'],inplace=True)
closed_ticket.reset_index(drop=True,inplace=True)

## String validation for name column

In [None]:
def string_is_invalid(text):
    
    #pattern for identifying the mail id
    regex1 = re.compile(r'([A-Za-z0-9]+[.-_])*[A-Za-z0-9]+@[A-Za-z0-9-]+(\.[A-Z|a-z]{2,})+|([A-Za-z0-9]+[.-_])*[A-Za-z0-9]+@[A-Za-z0-9-]+(\.[A-Z|a-z]{2,})+ ')
    
    
    #pattern for identifying the url link
    regex2 = re.compile(r'(https?://[^\s]+)|(www.[^\s]+)|(http?://[^\s]+)|(https?://[^\s]+)+\ |(www.[^\s]+\ )|(http?://[^\s]+\ )')
    
    
    try:
        
        # checking the string containing onlr mail id
        if re.fullmatch(regex1, text):
            return 'Insufficient data exist for the ML engine to continue'
        
       
        
        #checking the string is having any http or website link
        elif re.fullmatch(regex2, text):
             return 'Insufficient data exist for the ML engine to continue'
        
    
        else:
            
            if text.isdigit():
                 return 'Insufficient data exist for the ML engine to continue'
            
            else:

                #removing the some special character and sign
                text=re.sub(r'\+|\-|\\|\.|\(|\)|\/','', text)

                #removing the space between digits
                text=re.sub(r'(\d)\s+(\d)', r'\1\2', text)

                #checking the string contains only numbers or not
                if text.isdigit():
                     return 'Insufficient data exist for the ML engine to continue'
                
                else:
                    return text
                
        
    except Exception as e:
        return e

In [None]:
%%time
closed_ticket["name_string_validation"]=closed_ticket["Name"].apply(string_validation)

In [None]:
closed_ticket.drop(closed_ticket[closed_ticket['name_string_validation'] == True].index,inplace=True)
closed_ticket.reset_index(drop=True,inplace=True)

## Language detection and transalation to english

In [None]:
from langdetect import detect
from deep_translator import GoogleTranslator

In [None]:
def lang_detect(data):                                        
   try:                                                          
      return detect(data)                                      
   except:                                                       
      return 'Not identified'  

In [None]:
%%time
closed_ticket['langue'] = closed_ticket['Name'].apply(lang_detect)
closed_ticket = closed_ticket[closed_ticket['langue'] != 'Not identified']
closed_ticket.reset_index(drop=True,inplace=True)

In [None]:
def lang_translate(text,lang):
  try:
    if lang == 'en':
        return text
    
    else:
        return  GoogleTranslator(source='auto', target='en').translate(text)
  except:
    return text

In [None]:
%%time
closed_ticket['translated_text'] = closed_ticket.apply(lambda x: lang_translate(x['Name'], x['langue']), axis=1)

## Text pre-processing & Identifying the non-english character

In [None]:
import nltk
import string
stopwords = nltk.corpus.stopwords.words('english')

In [None]:
## to clean the text data

def text_preprocessing(text,language):
    
    try:
    
        #removing the some special character and sign
        text=re.sub(r'\’|\【|\】|\“|\”|\–|\…|\´|\‐|\’|\—|\•|\‘|\°|\€|\™|\uf0e0|\®|\，|\|\！','', text) 

        #removing the some special character and sign
        text=text.translate(str.maketrans('','',string.punctuation))

        #will replace newline with space
        text = re.sub("\n"," ",text)

        #will convert to lower case
        text = text.lower()

        # removing the stop words
        text = ' '.join([word for word in text.split() if word not in (stopwords)])

        #checking for the non-english characters
        join_text=re.sub(r'\ |\  ','', text)
        join_text=join_text.strip()
        if join_text.isalnum ():
            return text
        else:
            #checking='Non english'
            if language == 'en':
                return text
            else:
                return 'Non-English characters were found in the text, preventing the ML engine from proceeding'
            
    except Exception as e:
        return e

In [None]:
%%time
closed_ticket['cleaned_name'] = closed_ticket.apply(lambda x: text_preprocessing(x['translated_text'],x['langue']), axis=1)

In [None]:
non_english_character=closed_ticket[closed_ticket['cleaned_name']=='Non english']
non_english_character.reset_index(drop=True,inplace=True)
non_english_character.shape

In [None]:
english_character=closed_ticket[closed_ticket['cleaned_name']!='Non english']
english_character.reset_index(drop=True,inplace=True)
english_character.shape

In [None]:
%%time
non_english_character['translated_text'] = non_english_character.apply(lambda x: lang_translate(x['translated_text'], x['langue']), axis=1)

In [None]:
%%time
non_english_character['cleaned_name'] = non_english_character.apply(lambda x: text_preprocessing(x['translated_text'],x['langue']), axis=1)

In [None]:
closed_ticket=pd.concat([english_character, non_english_character])
closed_ticket.reset_index(drop=True,inplace=True)

In [None]:
def checking_len_string(text):
    
    string_length = len(text.split())
    character_length = len(text)
                        
    if string_length <= 1 or character_length <= 1:
        return "Not Valid"
                        
    else:
        return "Valid"

In [None]:
%%time
closed_ticket['string_length'] = closed_ticket['cleaned_name'].apply(checking_len_string)

In [None]:
closed_ticket.drop(closed_ticket[closed_ticket['string_length'] == "Not Valid"].index,inplace=True)

## Priortize ticket (keyword identification based)

In [None]:
data=closed_ticket[['Number','translated_text','cleaned_name','Priority','Urgency']]

In [None]:
data['cleaned_name']=data['cleaned_name'].astype(str)
data['translated_text']=data['translated_text'].astype(str)

In [None]:
def new_priority(text,Priority):
    
    try:

        if Priority == config['Medium']:
            medium_prority_keyword = re.compile(medium_keyword)
            if medium_prority_keyword.findall(text):
                new_priority = config['High']
                Message = 'Ticket is priortized from medium to high'
                return new_priority,Message
            
            elif text.startswith(keyword_startwith):
                new_priority = config['High']
                Message = 'Ticket is priortized from medium to high'
                return new_priority,Message
            
            else:
                return Priority

        elif Priority == config['Low']:
            low_prority_keyword = re.compile(low_keyword)
            if low_prority_keyword.findall(text):
                new_priority = config['Medium']
                Message = 'Ticket is priortized from low to medium'
                return new_priority
            else:
                return Priority 
        else:
            return Priority
        
    except Exception as e:
        return e

In [None]:
%%time
data['new_priority'] = data.apply(new_priority,axis=1)