In [None]:
#installing and updating the necessary packages to work with Google's generative AI,
#the Groq platform, and the integration between LangChain and Groq within your Jupyter Notebook environment

%pip install -U -q 'google-genai'
%pip install -U -q 'groq'
%pip install -U -q 'langchain-groq'

In [None]:
# Takes a CSV file, reads its data, transforms it into JSON format, and saves the JSON data to a new file.
# It uses the pandas library for data handling and the json library for JSON conversion

import pandas as pd
import json

# Read the CSV file
df = pd.read_csv('/content/testing_data_set.csv')

# Convert each row to a JSON object
json_list = df.to_dict(orient='records')

# Convert the list of JSON objects to a JSON string
json_output = json.dumps(json_list, indent=4)

# Save the JSON string to a file
with open('testing_data_set.json', 'w') as json_file:
    json_file.write(json_output)

print("The CSV file has been successfully converted to JSON format and saved to testing_data_set.json.")

In [None]:
#Provides utilities for Google Colab environments (likely where this code is running). It is used to access user data
from google.colab import userdata

# Used to interact with the Groq platform
from groq import Groq

#Enables integration between the LangChain framework and the Groq platform. LangChain is a framework for developing applications powered by language models.
from langchain_groq import ChatGroq
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import JsonOutputParser

import time

#To store the classification results.
final_dict = {}

# Reads the JSON data from the file and stores it in the test_json variable.
with open('/content/testing_data_set.json', 'r') as file:
    test_json = json.load(file)

# nitialize the Groq Language Model:
llm = ChatGroq(
    model_name="deepseek-r1-distill-llama-70b",
    temperature=0.3,
    api_key=userdata.get('GROQ_API_KEY')
)

# Define the expected JSON structure
parser = JsonOutputParser(pydantic_object={
    "type": "object",
    "properties": {
        "department": {"type": "string"},
        "priority": {"type": "string"},
        "language": {"type": "string"},
        "type": {"type": "string"}
    }
})

# Create a simple prompt
prompt = ChatPromptTemplate.from_messages([
    ("system", """Classify IT support tickets into JSON with this structure:
        {{
            "department": "Technical Support, Customer Service, Billing and Payments, Product Support, IT Support, Returns and Exchanges, Sales and Pre-Sales, Human Resources, Service Outages and Maintenance, General Inquiry",
            "priority": "low, medium, high",
            "language": "en, es, fr, de, etc. (full list at https://docs.aws.amazon.com/translate/latest/dg/what-is-languages.html#what-is-languages-supported)",
            "type": "Incident, Request, Problem, Change"
        }}

        Examples:
        [
            {{
                "ticket_ID": 1001,
                "ticket_subject": "Discrepancia de facturación en Google Workspace",
                "ticket_body": "Monto de facturación incorrecto.",
                "department": "Billing and Payments",
                "type": "Incident",
                "priority": "low",
                "language": "es"
            }},
            {{
                "ticket_ID": 1002,
                "ticket_subject": "Urgent Consultation Request for Critical IT Issues",
                "ticket_body": "Critical server issues.",
                "department": "Customer Service",
                "type": "Request",
                "priority": "high",
                "language": "en"
            }},
            {{
                "ticket_ID": 1003,
                "ticket_subject": "Consulta sobre Servicios de Consultoría en TI",
                "ticket_body": "Info on IT consulting.",
                "department": "General Inquiry",
                "type": "Request",
                "priority": "medium",
                "language": "es"
            }},
            {{
                "ticket_ID": 1004,
                "ticket_subject": "Demande de mise à jour des dossiers",
                "ticket_body": "Update employee records.",
                "department": "Human Resources",
                "type": "Change",
                "priority": "low",
                "language": "fr"
            }},
            {{
                "ticket_ID": 1005,
                "ticket_subject": "Issues with Slack connection affecting team communication today",
                "ticket_body": "Problems with Slack.",
                "department": "Product Support",
                "type": "Problem",
                "priority": "medium",
                "language": "en"
            }}
        ]
    """),
    ("user", "{input}")
])

# Create the chain that guarantees JSON output
# Assembles a LangChain chain to process the tickets.
# It links the prompt, llm (language model), and parser together. This means:
# The prompt is formatted with the ticket data.
# The formatted prompt is sent to the llm for classification.
# The llm's response is parsed by the parser to ensure JSON output.
chain = prompt | llm | parser


def classify_ticket(ticket: dict) -> dict:
    """This function takes a ticket dictionary as input.
    It invokes the LangChain chain to classify the ticket.
    It prints the classification result (in JSON format).
    It stores the result in the final_dict, using the ticket ID as the key."""

    result = chain.invoke({"input": json.dumps(ticket)})
    print(json.dumps(result, indent=2))
    # Result from LLm is store in final_dict with key as ticket_id
    final_dict[ticket['ticket_ID']] = [result.get("department"), result.get("type"), result.get("priority"), result.get("language")]

for i in range(0, 500):
    print(test_json[i])
    classify_ticket(test_json[i])
    #Pauses for 15 seconds before processing the next ticket. This is often done to avoid overloading APIs or rate limits.
    time.sleep(15)

In [None]:
# To write the classified intial data from final_dict variable to submission.csv
import csv

# Define the header
header = ['ticket_ID', 'department', 'type', 'priority', 'language']

# Write data to CSV file
with open('submission.csv', 'w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(header)
    for ticket_id, values in final_dict.items():
        writer.writerow([ticket_id] + values)

print("Data has been written to submission.csv")

In [None]:
# Google gen AI LLm is used to predict type and priority

from google.colab import userdata

# The Google Generative AI library used for interacting with their language models.
from google import genai
from google.genai import types

#A library for data validation and parsing using Python type hints
from pydantic import BaseModel

import json
import time

# Getting the API Key:
GOOGLE_API_KEY = userdata.get('GOOGLE_API_KEY')


# Defines instructions that will be given to the Google GenAI model, guiding it on how to classify tickets.
system_instructions = """
You are an AI model designed to classify IT support tickets.
Classify each ticket based on the following attributes:
- Priority: low, medium, high
- Type: Incident, Request, Problem, Change
"""

# Define the expected JSON structure for the ticket classification
# Defines the expected format for the AI's classification output. This ensures the output is organized and easily usable.
class TicketClassification(BaseModel):
    ticket_ID: str
    ticket_body: str
    ticket_subject: str
    priority: str
    ticket_type: str

# Initialize the GenAI client
client = genai.Client(api_key=GOOGLE_API_KEY)

# Load the testing data set
with open('testing_data_set.json', 'r') as file:
    test_json = json.load(file)


final_genai_dict = {}

# Function to classify a ticket
def classify_ticket(ticket: dict) -> dict:
    prompt = f"{system_instructions}\n\nTicket:\nSubject: {ticket['ticket_subject']}\nBody: {ticket['ticket_body']}"

    response = client.models.generate_content(
        model='gemini-2.0-flash',
        contents=prompt,
        config={
            'response_mime_type': 'application/json',
            'response_schema': TicketClassification,
        },
    )
    result = response.parsed
    final_genai_dict[ticket['ticket_ID']] = [result.ticket_type, result.priority]

# Classify each ticket from testing_data_set.json
for i in range(0,500):
    print(f"Classifying ticket {i}")
    classify_ticket(test_json[i])
    #Pauses for 3 seconds before processing the next ticket as we have only 15 RPM assuming 1 secound time for classification.
    time.sleep(3)

In [None]:
import pandas as pd

# Load the CSV file
df = pd.read_csv('/content/submission.csv')

# Override the 'type' and 'priority' columns in the DataFrame with values from final_genai_dict
df['type'] = df['ticketid'].map(lambda x: final_genai_dict[x][0])
df['priority'] = df['ticketid'].map(lambda x: final_genai_dict[x][1])

# Save the updated DataFrame back to the same CSV file
df.to_csv('submission_updated.csv', index=False)

print("The 'type' and 'priority' columns have been successfully updated and saved to submission_updated.csv.")

In [None]:
# More accurate information is given to deepseek with more input tokens

from langchain_groq import ChatGroq
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import JsonOutputParser
import json

final_depertment_dict = {}


with open('testing_data_set.json', 'r') as file:
    test_json = json.load(file)

# Initialize Groq LLM
llm = ChatGroq(
    model_name="deepseek-r1-distill-llama-70b",
    temperature=0.3,
    api_key=userdata.get('GROQ_API_KEY')
)

# Define the expected JSON structure
parser = JsonOutputParser(pydantic_object={
    "type": "object",
    "properties": {
        "department": {"type": "string"}
    }
})

# Create a simple prompt
prompt = ChatPromptTemplate.from_messages([
    ("system", """Classify IT support tickets into JSON with this structure:
        {{
            "department": "Technical Support, Customer Service, Billing and Payments, Product Support, IT Support, Returns and Exchanges, Sales and Pre-Sales, Human Resources, Service Outages and Maintenance, General Inquiry",

        }}
        Examples:
        [
    {{
        "ticket_ID": 1001,
        "ticket_subject": "Discrepancia de facturación en Google Workspace",
        "ticket_body": "Monto de facturación incorrecto en mi suscripción de Google Workspace Business Standard. Por favor, revise y ajuste la factura. Gracias.",
        "department": "Billing and Payments"
    }},
    {{
        "ticket_ID": 1002,
        "ticket_subject": "Urgent Consultation Request for Critical IT Issues",
        "ticket_body": "Experiencing critical server issues impacting operations. Urgently need your assistance. Contact via email or phone at <tel_num>. Thank you.",
        "department": "Customer Service"
    }},
    {{
        "ticket_ID": 1003,
        "ticket_subject": "Consulta sobre Servicios de Consultoría en TI",
        "ticket_body": "Interesado en información sobre sus Servicios de Consultoría en TI, especialmente en desarrollo de software y administración de servidores. Gracias.",
        "department": "General Inquiry"
    }},
    {{
        "ticket_ID": 1004,
        "ticket_subject": "Demande de mise à jour des dossiers",
        "ticket_body": "Demande de mise à jour de notre système de gestion des dossiers des employés pour améliorer l'efficacité et l'intégration avec notre infrastructure informatique. Merci.",
        "department": "Human Resources"
    }},
    {{
        "ticket_ID": 1005,
        "ticket_subject": "Issues with Slack connection affecting team communication today",
        "ticket_body": "Challenges activating Kaspersky Internet Security 2024. Need assistance. Contact via <tel_num> or <email>. Thank you.",
        "department": "Product Support"
    }},
    {{
        "ticket_ID": 1006,
        "ticket_subject": "Defective Dell XPS 13 9310",
        "ticket_body": "Received Dell XPS 13 9310 with defective display. Please arrange an exchange. Order number: <order_num>. Contact via <email> or <tel_num>. Thanks.",
        "department": "Returns and Exchanges"
    }},
    {{
        "ticket_ID": 1007,
        "ticket_subject": "Touchscreen and Keyboard Issues with Surface Pro 7",
        "ticket_body": "Issues with touchscreen response and detachable keyboard on Surface Pro 7. Requesting assistance or replacement. Thank you.",
        "department": "Sales and Pre-Sales"
    }},
    {{
        "ticket_ID": 1008,
        "ticket_subject": "AWS-Serverausfall",
        "ticket_body": "Unerwarteter Ausfall unserer AWS-Server. Dringend Unterstützung erforderlich. Danke.",
        "department": "Service Outages and Maintenance"
    }},
    {{
        "ticket_ID": 1009,
        "ticket_subject": "Solicitud urgente de orientación y solución de problemas",
        "ticket_body": "Problemas críticos con integración de software y conectividad del servidor. Necesitamos asistencia urgente. Gracias.",
        "department": "Technical Support"
    }}
]

    """),
    ("user", "{input}")
])

# Create the chain that guarantees JSON output
chain = prompt | llm | parser

def classify_ticket(ticket: dict) -> dict:
    result = chain.invoke({"input": json.dumps(ticket)})
    print(json.dumps(result, indent=2))
    final_depertment_dict[ticket['ticket_ID']] = result.get("department")

for i in range(0, 500):
    print(test_json[i])
    classify_ticket(test_json[i])
    time.sleep(15)

In [None]:
import pandas as pd

# Load the CSV file
df = pd.read_csv('submission_updated.csv')

# Override the 'department' column in the DataFrame with values from final_depertment_dict
df['department'] = df['ticketid'].map(final_depertment_dict)

# Save the updated DataFrame back to the CSV file
df.to_csv('final_submission.csv', index=False)

print("The 'department' column has been successfully updated and saved to final_submission.csv.")
