# Deleting logos

In [272]:
from PIL import Image
import imagehash
import os
from collections import defaultdict

def get_image_hash(image_path):
    """
    Calculate perceptual hash of an image.
    Returns None if the file is not a valid image.
    """
    try:
        return str(imagehash.average_hash(Image.open(image_path)))
    except Exception as e:
        print(f"Error processing {image_path}: {str(e)}")
        return None

def find_similar_images(root_dir):
    """
    Find and group similar images across directories.
    Returns a dictionary where keys are image hashes and values are lists of image paths.
    """
    # Dictionary to store hash -> [image_paths]
    hash_dict = defaultdict(list)
    
    # Supported image extensions
    image_extensions = {'.jpg', '.jpeg', '.png', '.bmp', '.gif', '.tiff'}
    
    # Walk through all directories and subdirectories
    for root, _, files in os.walk(root_dir):
        for filename in files:
            if os.path.splitext(filename)[1].lower() in image_extensions:
                image_path = os.path.join(root, filename)
                image_hash = get_image_hash(image_path)
                
                if image_hash:
                    hash_dict[image_hash].append(image_path)
    
    # Filter out unique images (those without duplicates)
    return {k: v for k, v in hash_dict.items() if len(v) > 1}

def print_duplicate_groups(duplicates):
    """
    Print groups of duplicate images in a readable format.
    """
    if not duplicates:
        print("No duplicate images found.")
        return
        
    print(f"\nFound {len(duplicates)} groups of similar images:")
    for idx, (image_hash, paths) in enumerate(duplicates.items(), 1):
        print(f"\nGroup {idx}:")
        for path in paths:
            print(f"  - {path}")

def main():
    # Example usage
    directory = r"C:\Users\v-samomin\Downloads\PST\attachments"
    
    print("Scanning for similar images...")
    duplicate_groups = find_similar_images(directory)
    #print_duplicate_groups(duplicate_groups)
    
    # Return the dictionary for further processing if needed
    return duplicate_groups

if __name__ == "__main__":
    duplicate_groups = main()

Scanning for similar images...




In [166]:
import itertools
image_classification = {
    "ms_logo": ["3f3f0e0048003f7f", "7f3f0e0040003f7f", "ff3f0e0000213fff", "ffff3f01033fffff", "d0404040c0c0c0d9",
                "ff3f1d01013fffff", "ff3f0e0000207fff", "ff3f0e0000203fff","d2c8c0c0c0c0d2c0", "3f3f0e0048407f7f",
                "3f3f0e0048007f7f", "ff3f2f0000213fff","3f3f0e0000203fff", "ff3f0f0000213fff", "c0c0d1dff6dfc0c0",
                "c0c0d1bff7ffc0c0", ],
    "ms_word_logo": ["3c3e0f6f7f0f1f3f"],
    "ms_holo_lens_logo": ["ffff7f0100007fff", "ff2e203f3f27207f", "6460606063606060",],
    "windchill_product_access": ['1f0f01ff010f0f1f', "ffff2fffff1f0303", "ffb39f9f8000ffff", "00007f7f7f7f7f7f",],
    "ms_surface_logo": ['000080c0ffffff80', "3f20203f3f33216b","0000d0ffbfff0000", "8080c0fbf3ff8080", "ffffffd580c0c0df",
                        "c7d7c3c3c7ffff00", "ffff8080c0c0dfff", "8080c0fbf3ffc180"],
    "creo_management_tool_access": ["00fc1fffe0fffff9", "f8989885f73b3bf9", "ff181818181819ff", "02027e3ffffff000",
                    "ff00000000070f07","c1e383c000000383","ff80808080000080","0e3e02027e7e7e78","000080ffffffbf3f",
                    "81c3c38101a183ff", "ff001c1c1c1f3e10", "000080ffffffbfbf", "81c3c381018103ff", "ff001c1c1c3f3f10",
                    "000717878fffbffe", "003f0f0f276767ff", "00ffffffffffffff", "3c00005aff00ffff", "efffc1e7c1ffc0c7",
                    "ff001c1c1c1f3f10", ],
    "teams_logo": ["f7e406000000c1f7", "3f0f070707073fff", "7f7f4747670303bf"],
    "linkedin_logo": ["0060607e7e767600"],
    "xbox_logo": ["18183c3c3c3c1818"],
    "other_logo": ["0000007f7f7f0000", "df8faf3f3faf8f9f", 'ff9f803f301016ff', "00127f7f233fffee", "207050ffc7577f20",
                   "0707070707070707", "006070df7f200000", "00303e3f7f360000", "fe0001e3f7f7efef", "e0e0e0e0e0fefffb",
                   "1f1f1f1f5f07400f", "006070df7f600000", "1f1f1f1f5f0f001f"],
    "user_logo": ["3c72ebe7f7e30800"],
    "revert_3d_part": ["3f3f3f200f0108ff", "3f2f6f080fd148ff", "7f3f3f7f0f0700ff", "7f7f7f1c1cfe0000"]     
}

logo_dict = {
    "ms_logo": ["3f3f0e0048003f7f", "7f3f0e0040003f7f", "ff3f0e0000213fff", "ffff3f01033fffff", "d0404040c0c0c0d9",
                "ff3f1d01013fffff", "ff3f0e0000207fff", "ff3f0e0000203fff","d2c8c0c0c0c0d2c0", "3f3f0e0048407f7f",
                "3f3f0e0048007f7f", "ff3f2f0000213fff","3f3f0e0000203fff", "ff3f0f0000213fff", "c0c0d1dff6dfc0c0",
                "c0c0d1bff7ffc0c0", ],
    "ms_word_logo": ["3c3e0f6f7f0f1f3f"],
    "ms_holo_lens_logo": ["ffff7f0100007fff", "ff2e203f3f27207f", "6460606063606060",],
    "ms_surface_logo": ['000080c0ffffff80', "3f20203f3f33216b","0000d0ffbfff0000", "8080c0fbf3ff8080", "ffffffd580c0c0df",
                        "c7d7c3c3c7ffff00", "ffff8080c0c0dfff", "8080c0fbf3ffc180"],
    "teams_logo": ["f7e406000000c1f7", "3f0f070707073fff", "7f7f4747670303bf"],
    "linkedin_logo": ["0060607e7e767600"],
    "xbox_logo": ["18183c3c3c3c1818"],
    "other_logo": ["0000007f7f7f0000", "df8faf3f3faf8f9f", 'ff9f803f301016ff', "00127f7f233fffee", "207050ffc7577f20",
                   "0707070707070707", "006070df7f200000", "00303e3f7f360000", "fe0001e3f7f7efef", "e0e0e0e0e0fefffb",
                   "1f1f1f1f5f07400f", "006070df7f600000", "1f1f1f1f5f0f001f"],
    "user_logo": ["3c72ebe7f7e30800"]   
}
process_list = list(itertools.chain.from_iterable(list(image_classification.values())))

logo_list = list(itertools.chain.from_iterable(list(logo_dict.values())))

def delete_files(path_list):  
    for path in path_list:  
        try:  
            os.remove(path)  # Delete the file  
            print(f"Deleted: {path}")  
        except FileNotFoundError:  
            print(f"File not found: {path}")  
        except PermissionError:  
            print(f"Permission denied: {path}")  
        except Exception as e:  
            print(f"Error deleting {path}: {e}") 


In [None]:
c = 0
for hash in list(duplicate_groups.keys()):
    if hash in logo_list:
        c = c + len(duplicate_groups[hash])
        paths = duplicate_groups[hash]
        delete_files(paths)

# Classifications

In [15]:
from datetime import datetime
import json
import re
import difflib 


def custom_encoder(obj):  
        if isinstance(obj, datetime):  
            return obj.isoformat()  # Convert datetime to ISO 8601 string  
        raise TypeError(f"Object of type {type(obj)} is not JSON serializable")

def custom_decoder(dct):
    for key, value in dct.items():
        if key == 'time_py' and isinstance(value, str):
            try:
                dct[key] = datetime.fromisoformat(value)
            except ValueError:
                pass  # If it's not a valid ISO format, leave it as is
    return dct



def is_similar(str1, str2, threshold=0.95):  
    """Check if two strings are similar based on a given threshold."""  
    similarity = difflib.SequenceMatcher(None, str1, str2).ratio()  
    return similarity >= threshold  
  
def find_matching_cc(emails, available_resolver_group):  
    for email in reversed(emails):  # Start from the most recent email
        if email != {}:
            cc_list = email.get('cc', [])  
            to_list = email.get('to', [])

            combined_list  = cc_list + to_list
            for cc in combined_list:  
                cc_name = cc['name'].lower()
                cc_email = cc['email']
                if cc_email:
                    cc_email.lower()  
    
                for resolver in available_resolver_group:  
                    resolver_name = resolver['name'].lower()    
                    resolver_email = resolver['email'].lower()  
    
                    # Check for exact match or similarity  
                    if (cc_email == resolver_email) or (cc_email is None and is_similar(cc_name, resolver_name)):  
                        return resolver
        else:
            continue
  
    return None  # Return None if no matches found   

In [213]:
available_resolver_group = [{"name": "PDMLINK_ADMIN", "email": "PDMLINK_ADMIN@microsoft.com"},
                  {"name": "Devices SW Licenses", "email": "SurfSWLic@microsoft.com"},
                  {"name": "Creo Help", "email": "Creo_Help@microsoft.com"},
                  {"name": "DevicesHelp", "email": "DevicesHelp@microsoft.com"},
                  {"name": "DES Tools and Automation Engg (Redmond)", "email": "destasreredmond@microsoft.com"}]


input_file = r"C:\Users\v-samomin\Downloads\PST\data_cleaned_2.json"
with open(input_file, 'r') as file:
        email_list = [json.loads(line, object_hook=custom_decoder) for line in file]

email_dict = []
for emails in email_list:
    email_str = ""
    email_chain = emails[::-1]
    attachment = []
    for i in range(len(email_chain)):
        if email_chain[i] != {}:
            email_str = email_str + f"**email: {i+1}** \n\n {email_chain[i]['email_content']} \n\n\n"
            if email_chain[i]['attachment']:
                for j in range(len(email_chain[i]['attachment'])):
                    dd = r"C:\Users\v-samomin\Downloads\PST"
                    if os.path.isfile(dd+"\\"+email_chain[i]['attachment'][j]):
                        attachment.append(email_chain[i]['attachment'][j])
    if len(attachment) > 0:
        email_str = email_str + "\n\n\n Please refer the attachaed images which are the attachments to the above email chain"
        attachment = list(set(attachment))
    else:
        attachment = None
    

    email_start_time = email_chain[0].get('time_py', None)
    email_end_time = email_chain[-1].get('time_py', None)
    email_subject = email_chain[-1].get('subject', None)

    resolver_group = find_matching_cc(email_chain, available_resolver_group)

    email_dict.append({
        "start_time": email_start_time,
        "end_time" : email_end_time,
        "subject": email_subject,
        "resolver_group": resolver_group,
        "email_content": re.sub(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b', '', email_str),
        "attachment": attachment
    })




In [None]:
# with open(r"C:\Users\v-samomin\Downloads\PST\data_all_in_one__.json", 'w') as file:
#     json.dump(email_dict__, file, default=custom_encoder, indent=4)

In [261]:
with open(r"C:\Users\v-samomin\Downloads\PST\data_all_in_one.json", 'r') as file:
    email_dict = json.load(file, object_hook=custom_decoder)

In [265]:
with open(r"C:\Users\v-samomin\Desktop\git\sample-app-aoai-chatGPT\backend\email\processed_emails.json", 'r') as file:
    prcossed_email = json.load(file, object_hook=custom_decoder)

In [274]:
len(prcossed_email)

1100

In [None]:
from openai import AzureOpenAI
from azure.identity import (
    DefaultAzureCredential,
    get_bearer_token_provider
)
import base64
import instructor
from pydantic import BaseModel, Field
from typing import Optional




def encode_image(image_path):
  with open(image_path, "rb") as image_file:
    return base64.b64encode(image_file.read()).decode('utf-8')
  

def get_user_content(email_chain):
  full_email = f"Subject: {email_chain['subject']}\n {email_chain['email_content']}"
  
  if email_chain['attachment']:
      user_content = []
      user_content.append({"type": "text",
                          "text": full_email})
      
      for image in email_chain['attachment']:
          image_path = r"C:\Users\v-samomin\Downloads\PST" + "\\" + image
          img_format = image.split('.')[-1]
          base64_image = encode_image(image_path)
          user_content.append(
            {
            "type": "image_url",
            "image_url": {
              "url":  f"data:image/{img_format};base64,{base64_image}"
            },
          }
          )
  else:
    user_content = full_email
  return user_content
   
  
class EmailInfo(BaseModel):
    issue: Optional[str] = Field(None,description="Extract issue mentioned in the mail chain.")
    resolution: Optional[str] = Field(None, description="Extract the resolution steps or method mentioned in the mail chain")
    resolved: Optional[str] = Field(None, description="If issue is resolved in the mail chain then this will be 'YES', else 'NO'")

system_prompt = f"""
You are an expert AI assistant specializing in the analysis of email chains.

Your task is to extract a precise issue/query and its resolution if they are mentioned in the provided email chain. The email chain may contain errors or issues faced by the user.

While extracting the issue and resolution, do not mention anything about the user or who resolved it; simply summarize it as "Issue" and "Resolution" without discussing or mentioning anything else.

You may receive attached images; analyze them and create a clear and comprehensive query/issue and resolution while ensuring no one has access to the images.

Always include all error messages, error codes/numbers, requests, issues, and critical details from both the images and the email chain that are necessary for the resolution. Present the issue and resolution in a way that can be directly displayed on a forum.

If no issue is mentioned, respond with 'no issue in the email chain'; also consider access requests as an issue.

If no resolution steps or methods are mentioned, respond with 'no resolutions methods are mentioned in the email chain'.

If the email states that the issue/query is resolved, include "Issue has been resolved" at the end of the response. If the issue is not resolved, add "Issue is not yet resolved" at the end.

Note: Always response as 1st person while writing the issue and its resolution, e.g, user forgot password and they did sent email, instead response like this "Forgot Password and sent email"
"""


##1100 jail break
for email_chain in email_dict[1101:]:
   user_content = get_user_content(email_chain)

   with DefaultAzureCredential() as credential:
    ad_token_provider = get_bearer_token_provider(
        credential,
        "https://cognitiveservices.azure.com/.default"
    )

   client = instructor.from_openai(AzureOpenAI(
            api_version="2024-05-01-preview",
            api_key=None,
            azure_ad_token_provider=ad_token_provider,
            azure_endpoint="https://ssaopenaiservices.openai.azure.com/",
        ))
   
   response = client.chat.completions.create(
    model="ssagpt4o",
    response_model=EmailInfo,
    messages=[{"role": "system", "content": system_prompt},
              {"role": "user", "content": user_content}],
    temperature = 0.075)
   
   email_chain['issue'] = response.issue
   email_chain['resolution'] = response.resolution
   email_chain['resolved'] = response.resolved
   prcossed_email.append(email_chain)

   with open(r"C:\Users\v-samomin\Desktop\git\sample-app-aoai-chatGPT\backend\email\processed_emails.json", 'w') as file:
    json.dump(prcossed_email, file, default=custom_encoder, indent=4)

   


In [275]:
email_chain

{'start_time': '2023-06-12T10:09:00',
 'end_time': '2023-06-16T05:26:00',
 'subject': 'RE: Creo license fail to load',
 'resolver_group': {'name': 'PDMLINK_ADMIN',
  'email': 'PDMLINK_ADMIN@microsoft.com'},
 'email_content': '**email: 1** \n\n Hi PDMLink Admin,\nMy name is Jessica, and I have an issue with opening up Creo in the past two days. I didn\'t have this issue before, but right now it is showing the following error message, and then the program would close automatically. Can you help me with this issue? Thank you!\nBest,\nJessica \n\n\n**email: 2** \n\n Hello Jessica\nHappy Monday\nA quick check, could you please confirm you are connected to MSFT VPN\nThank You\nRavi A \n\n\n**email: 3** \n\n Yes, I am! \n\n\n**email: 4** \n\n Hello@Devices SW LicensesTeam\nHappy Monday\nCould you please confirm Jessica is added to the group to use Creo License.\nThank You\nRavi A \n\n\n**email: 5** \n\n Hi Ravi,\nSure, we are validating the permissions and will provide an update shortly.\nReg

In [7]:
from datetime import datetime
import json
import re


def custom_encoder(obj):  
        if isinstance(obj, datetime):  
            return obj.isoformat()  # Convert datetime to ISO 8601 string  
        raise TypeError(f"Object of type {type(obj)} is not JSON serializable")

def custom_decoder(dct):
    for key, value in dct.items():
        if key == 'time_py' and isinstance(value, str):
            try:
                dct[key] = datetime.fromisoformat(value)
            except ValueError:
                pass  # If it's not a valid ISO format, leave it as is
    return dct

with open(r"C:\Users\v-samomin\Desktop\git\sample-app-aoai-chatGPT\backend\email\processed_emails.json", 'r') as file:
    prcossed_email = json.load(file, object_hook=custom_decoder)

In [None]:
# from typing import List
# from pydantic import BaseModel, Field
# from typing import Literal, Optional
# from openai import AzureOpenAI
# from azure.identity import (
#     DefaultAzureCredential,
#     get_bearer_token_provider
# )
# import base64
# import instructor

# class MultiClassPrediction(BaseModel):

#     resolution_method: Optional[str] = Field(
#         ...,
#         description="Mentioned resolution steps or methods to resolve an issue.",
#     )

#     class_labels: List[Literal["PDMLink", "License", "Assist", "Creo"]] = Field(
#         ...,
#         description="The predicted class labels from the given issue, resolution and subject.",
#     )

# system_prompt = f"""
# You are an expert AI assistant specializing in classifying issues and extracting relevant resolution steps or methods.

# **Your Task:** Classify each provided issue into one of the following categories and extract any resolution methods if mentioned.

# - **Categories:**
#   - **PDMLink**: Issues related to PDMLink, OnePDM, or Windchill. Examples include: inability to access these tools or errors related to them.
#   - **License**: Issues related to licensing, such as an inability to start a session, fetch a license, or other license requirements.
#   - **Assist**: Issues requiring assistance in completing a task, such as difficulty installing, accessing certain functionalities, or understanding usage.
#   - **Creo**: Issues related to the Creo tool, such as requests for help with 3D data, surfacing, reverse engineering, or import/export functions.

# **Resolution Steps Extraction:**
# - Resolution steps refer to methods or specific steps to resolve each issue e.g, by following this steps issues can be resolved, I've resolved this doing this and that.
#   - For **Access** and **License** issues, resolution steps are not applicable, so mark these as "None."
#   - If no resolution steps are mentioned, mark them as "None."
# """


# ##1100 jail break

# classified_email = []

# for email_chain in prcossed_email:
#     if email_chain['issue']:
#         user_content = f"**Subject:** {email_chain['subject']} \n\n **Issue:** {email_chain['issue']}"
#         if email_chain['resolution']:
#             user_content = user_content + f"\n\n **Resolution:** {email_chain['resolution']}"


#         with DefaultAzureCredential() as credential:
#             ad_token_provider = get_bearer_token_provider(
#                 credential,
#                 "https://cognitiveservices.azure.com/.default"
#             )

#         client = instructor.from_openai(AzureOpenAI(
#                     api_version="2024-05-01-preview",
#                     api_key=None,
#                     azure_ad_token_provider=ad_token_provider,
#                     azure_endpoint="https://ssaopenaiservices.openai.azure.com/",
#                 ))
        
#         response = client.chat.completions.create(
#             model="ssagpt4o",
#             response_model=MultiClassPrediction,
#             messages=[{"role": "system", "content": system_prompt},
#                     {"role": "user", "content": user_content}],
#             temperature = 0.075)
        
#         class_labels = response.class_labels
#         resolution_method = response.resolution_method

#     else:
#         class_labels = None
#         resolution_method = None



    

#     email_chain['theme'] = class_labels
#     email_chain['resolution_method'] = resolution_method

#     classified_email.append(email_chain)

#     with open(r"C:\Users\v-samomin\Desktop\git\sample-app-aoai-chatGPT\backend\email\classified_emails.json", 'r') as file:
#         classified_email = json.load(file, object_hook=custom_decoder)


   
   


In [8]:
len(prcossed_email)

1307

In [10]:
import json
from concurrent.futures import ThreadPoolExecutor, as_completed
from typing import List
from pydantic import BaseModel, Field
from typing import Literal, Optional
import traceback

from openai import AzureOpenAI
from azure.identity import (
    DefaultAzureCredential,
    get_bearer_token_provider
)
import instructor

class MultiClassPrediction(BaseModel):

    resolution_method: Optional[str] = Field(
        ...,
        description="Mentioned resolution steps or methods to resolve an issue.",
    )

    class_labels: Literal["PDMLink", "License", "Assist", "Creo"] = Field(
        ...,
        description="The predicted class labels from the given issue, resolution and subject.",
    )

    sub_class_labels: Literal["access", "onePDM", "session", "fetch_license", "install", "functionality", "3D", "help_in"] = Field(
        ...,
        description="The predicted sub class labels from the given issue, resolution and subject.",
    ) 

system_prompt = f"""
You are an expert AI assistant specializing in classifying issues and extracting relevant resolution steps or methods.

**Your Task:** Classify each provided issue into one of the following categories, sub-categories and extract any resolution methods if mentioned.

- **Categories:**
  - **PDMLink**: Issues related to PDMLink, OnePDM, or Windchill. Examples include: inability to access these tools or errors related to them.
  - **License**: Issues related to licensing, such as an inability to start a session, fetch a license, or other license requirements.
  - **Assist**: Issues requiring assistance in completing a task, such as difficulty installing, accessing certain functionalities, or understanding usage.
  - **Creo**: Issues related to the Creo tool, such as requests for help with 3D data, surfacing, reverse engineering, or import/export functions.

- **Sub-Categories:**
  - **access**: Issues related to access
  - **onePDM**: Issues related to error in onePDM, PDMLink or Windchill.
  - **session**: Issues related to unable to start session
  - **fetch_license**:Issues related to unable fetch license
  - **install**: Issues related to unable to install
  - **functionality**: Issues related to unable to obtain certain functionality
  - **3D**: When help on 3D data is required
  - **help_in**: when help in surfing, reverse engineering, import-export etc., required or requested.

- **Resolution Steps Extraction:**
  - Resolution steps refer to methods or specific steps to resolve each issue e.g, by following this steps issues can be resolved, I've resolved this doing this and that.
  - For **Access** and **License** related issues, resolution steps are not applicable, so mark these as "None" even if resolution were provided.
  - If no resolution steps are mentioned, mark them as "None."
  - If the given resolution have or mentioned access request, email forward and sent etc, in this cases resolution steps are "None".
  - Do not include any user's name in the Resolution Steps.
"""



def process_email(email_chain):
    try:
        if email_chain['issue']:
            user_content = f"**Subject:** {email_chain['subject']} \n\n **Issue:** {email_chain['issue']}"
            if email_chain['resolution']:
                user_content += f"\n\n **Resolution:** {email_chain['resolution']}"

            # Authentication and client setup
            with DefaultAzureCredential() as credential:
                ad_token_provider = get_bearer_token_provider(
                    credential,
                    "https://cognitiveservices.azure.com/.default"
                )

            client = instructor.from_openai(AzureOpenAI(
                api_version="2024-05-01-preview",
                api_key=None,
                azure_ad_token_provider=ad_token_provider,
                azure_endpoint="https://ssaopenaiservices.openai.azure.com/",
            ))

            # Generate response
            response = client.chat.completions.create(
                model="ssagpt4o",
                response_model=MultiClassPrediction,
                messages=[{"role": "system", "content": system_prompt},
                        {"role": "user", "content": user_content}],
                temperature=0.05
            )

            # Extract classifications
            email_chain['theme'] = response.class_labels
            email_chain['category'] = response.sub_class_labels
            email_chain['resolution_method'] = response.resolution_method
        else:
            email_chain['theme'] = None
            email_chain['category'] = None
            email_chain['resolution_method'] = None

    except Exception as e:
        print(f"Error processing email with subject: {email_chain.get('subject', 'Unknown')}")
        print(traceback.format_exc())
        # You might also want to add an error field to the email chain for later review
        email_chain['error'] = str(e)
        email_chain['theme'] = None
        email_chain['category'] = None
        email_chain['resolution_method'] = None


    return email_chain


def classify_emails_parallel(processed_emails):
    classified_emails = []
    with ThreadPoolExecutor() as executor:
        # Submit all email processing tasks to the executor
        future_to_email = {executor.submit(process_email, email): email for email in processed_emails}

        # Process results as they complete
        for future in as_completed(future_to_email):
            classified_emails.append(future.result())

    return classified_emails



# Run the classification in parallel
classified_emails = classify_emails_parallel(prcossed_email)

# Save classified emails back to JSON if needed
with open(r"C:\Users\v-samomin\Desktop\git\sample-app-aoai-chatGPT\backend\email\classified_emails.json", 'w') as file:
    json.dump(classified_emails, file, indent=2)


Error processing email with subject: RE: [Assist Tool][Creo]: Need help in Creo/Pdm link
Traceback (most recent call last):
  File "c:\Users\v-samomin\AppData\Local\Programs\Python\Python310\lib\site-packages\instructor\retry.py", line 180, in retry_sync
    response = func(*args, **kwargs)
  File "c:\Users\v-samomin\AppData\Local\Programs\Python\Python310\lib\site-packages\openai\_utils\_utils.py", line 274, in wrapper
    return func(*args, **kwargs)
  File "c:\Users\v-samomin\AppData\Local\Programs\Python\Python310\lib\site-packages\openai\resources\chat\completions.py", line 679, in create
    return self._post(
  File "c:\Users\v-samomin\AppData\Local\Programs\Python\Python310\lib\site-packages\openai\_base_client.py", line 1260, in post
    return cast(ResponseT, self.request(cast_to, opts, stream=stream, stream_cls=stream_cls))
  File "c:\Users\v-samomin\AppData\Local\Programs\Python\Python310\lib\site-packages\openai\_base_client.py", line 937, in request
    return self._reque

In [None]:
# with open(r"C:\Users\v-samomin\Desktop\git\sample-app-aoai-chatGPT\backend\email\classified_emails.json", 'r') as file:
#         classified_email_ = json.load(file, object_hook=custom_decoder)

# classified_email_cleaned = []
# for email_chain in classified_email_:
#     if email_chain['issue']:
#         print("all good")
#     else:
#         email_chain['resolver_group'] = None
#     classified_email_cleaned.append(email_chain)

# with open(r"C:\Users\v-samomin\Desktop\git\sample-app-aoai-chatGPT\backend\email\classified_emails_cleaned__.json", 'w') as file:
#     json.dump(classified_email_cleaned__, file, indent=2)
    

In [17]:
with open(r"C:\Users\v-samomin\Desktop\git\sample-app-aoai-chatGPT\backend\email\classified_emails_cleaned.json", 'r') as file:
        classified_email = json.load(file, object_hook=custom_decoder)

In [18]:
formatted_data = [
    {
        "start_time": item["start_time"],
        "end_time": item["end_time"],
        "subject": item["subject"],
        "resolver_email": item["resolver_group"]["email"] if item["resolver_group"] else None,
        "email_content": item["email_content"],
        "attachment": item["attachment"],
        "issue": item["issue"],
        "resolution": item["resolution"],
        "resolved": item["resolved"],
        "theme": item["theme"],
        "resolution_method": item["resolution_method"],
        "category": item["category"]
    }
    for item in classified_email
]

In [19]:
import pandas as pd
df = pd.DataFrame(formatted_data)

In [30]:
df.to_csv(r"C:\Users\v-samomin\Downloads\email_classes.csv", index=False)

In [33]:
df['theme'].value_counts().reset_index(name='count')

Unnamed: 0,index,count
0,PDMLink,876
1,Assist,183
2,License,115
3,Creo,79


In [35]:
df['category'].value_counts().reset_index(name='count')

Unnamed: 0,index,count
0,access,731
1,onePDM,204
2,fetch_license,106
3,functionality,75
4,install,71
5,help_in,57
6,3D,5
7,session,4


In [28]:
category_counts = df.groupby(['theme', 'category']).size().reset_index(name='count')

In [29]:
category_counts

Unnamed: 0,theme,category,count
0,Assist,3D,1
1,Assist,access,42
2,Assist,functionality,68
3,Assist,help_in,5
4,Assist,install,67
5,Creo,3D,4
6,Creo,access,12
7,Creo,functionality,7
8,Creo,help_in,52
9,Creo,install,2


In [36]:
# Convert start_time to datetime format
df['start_time'] = pd.to_datetime(df['start_time'])

# Group by date and count the number of entries per day
daily_counts = df.groupby(df['start_time'].dt.date).size()

# Calculate the average number of entries per day
daily_average = daily_counts.mean()

daily_average


3.996932515337423

In [37]:
resolved_count = df[
    (df['resolved'] == 'YES') &
    (df['resolution_method'].notna()) &
    (~df['theme'].isin(['License'])) &
    (~df['category'].isin(['access', 'session']))
]#.shape[0]

In [39]:
resolved_count.shape[0]

222

In [40]:
resolved_count.to_csv(r"C:\Users\v-samomin\Downloads\can_have_usful_email.csv", index=False)