<a href="https://colab.research.google.com/github/Saransh1329/BlackBox-Agentic-AI-for-Predictive-Maintenance/blob/main/ai5.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
import pandas as pd
import re
from google.colab import files
from collections import defaultdict

# Upload Excel file
print("Please upload your Excel file:")
uploaded = files.upload()

# Get the filename
filename = list(uploaded.keys())[0]

# Read all worksheets
excel_file = pd.ExcelFile(filename)
print(f"\nWorksheets found: {excel_file.sheet_names}")

# Read the Feedback worksheet
feedback_df = pd.read_excel(filename, sheet_name='Feedback')

print("\nOriginal Feedback data shape:", feedback_df.shape)
print("\nColumns in Feedback:", feedback_df.columns.tolist())

# Function to clean fault names by removing (HIGH) or (MEDIUM) tags
def clean_fault_name(fault):
    if pd.isna(fault):
        return fault
    # Remove (HIGH), (MEDIUM), or similar patterns in brackets
    cleaned = re.sub(r'\s*\([^)]*\)', '', str(fault))
    return cleaned.strip()

# Clean the faults_detected column
feedback_df['faults_detected_cleaned'] = feedback_df['faults_detected'].apply(clean_fault_name)

# Create the Manufacturer analysis
fault_analysis = defaultdict(lambda: {
    'no_of_faults': 0,
    'accurate_faults': 0,
    'vehicle_ids': [],
    'ratings': [],
    'predictions': [],
    'comments': []
})

# Analyze each row
for idx, row in feedback_df.iterrows():
    fault = row['faults_detected_cleaned']

    if pd.notna(fault):
        fault_analysis[fault]['no_of_faults'] += 1
        fault_analysis[fault]['vehicle_ids'].append(row['vehicle_id'])
        fault_analysis[fault]['ratings'].append(row['Rating'])
        fault_analysis[fault]['predictions'].append(row['Prediction Correct(Yes)'])
        fault_analysis[fault]['comments'].append(row['Additional Comments'])

        # Count accurate faults (where prediction is correct)
        prediction = str(row['Prediction Correct(Yes)']).strip().lower()
        if prediction == 'yes':
            fault_analysis[fault]['accurate_faults'] += 1

# Create the Manufacturer dataframe
manufacturer_data = []
for fault, data in fault_analysis.items():
    manufacturer_data.append({
        'fault_detected': fault,
        'no_of_faults': data['no_of_faults'],
        'accurate_faults': data['accurate_faults']
    })

manufacturer_df = pd.DataFrame(manufacturer_data)

# Sort by number of faults (descending)
manufacturer_df = manufacturer_df.sort_values('no_of_faults', ascending=False).reset_index(drop=True)

print("\n" + "="*50)
print("Manufacturer Analysis Summary:")
print("="*50)
print(f"\nTotal unique faults: {len(manufacturer_df)}")
print(f"\nTop 5 most common faults:")
print(manufacturer_df[['fault_detected', 'no_of_faults', 'accurate_faults']].head())

# Save back to Excel with the new Manufacturer worksheet
output_filename = filename.rsplit('.', 1)[0] + '_with_manufacturer.xlsx'

with pd.ExcelWriter(output_filename, engine='openpyxl') as writer:
    # Copy all original worksheets
    for sheet_name in excel_file.sheet_names:
        df = pd.read_excel(filename, sheet_name=sheet_name)
        df.to_excel(writer, sheet_name=sheet_name, index=False)

    # Add the new Manufacturer worksheet
    manufacturer_df.to_excel(writer, sheet_name='Manufacturer', index=False)

print(f"\n✓ File saved as: {output_filename}")
print(f"✓ New 'Manufacturer' worksheet created with {len(manufacturer_df)} rows")

# Download the file
files.download(output_filename)

print("\n✓ File downloaded successfully!")

Please upload your Excel file:


Saving scheduled_appointments (1) (2).xlsx to scheduled_appointments (1) (2) (2).xlsx

Worksheets found: ['Owner Details', 'Appointment', 'Failiure Prediction', 'Maintainence Records', 'Feedback']

Original Feedback data shape: (389, 5)

Columns in Feedback: ['vehicle_id', 'faults_detected', 'Rating', 'Prediction Correct(Yes)', 'Additional Comments']

Manufacturer Analysis Summary:

Total unique faults: 9

Top 5 most common faults:
  fault_detected  no_of_faults  accurate_faults
0       Air Flow           188              178
1   Exhaust Temp            47               43
2   Oil Pressure            45               43
3        Battery            32               31
4     Brake Temp            26               25

✓ File saved as: scheduled_appointments (1) (2) (2)_with_manufacturer.xlsx
✓ New 'Manufacturer' worksheet created with 9 rows


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>


✓ File downloaded successfully!
