In [10]:
#Import packages
import pandas as pd

# Create reference table - Codes of Reasons for Complaints

In [7]:
# Revised list of grouped categories based on the user's updated input
new_groups = [
    "Service Quality, Vehicle Reliability",
    "Car features",
    "Brand Loyalty Shift, Interior Features & Comfort, Customer Service, Dealership",
    "Vehicle Reliability, Service Quality",
    "Safety Concerns, Vehicle Performance, Customer Service",
    "Exterior Design, Interior Features & Comfort, Driving Performance",
    "Pricing Concerns, Brand Perception, Depreciation Concerns, Exterior Design, Technology Quality",
    "Electronic Reliability, System Integration, User Experience",
    "Maintenance Cost, Warranty Support, Customer Service",
    "Dealer Concerns, Material Quality, Resale Value, Driving Experience",
    "Vehicle Reliability, Service Quality, Brand Perception",
    "Feature Availability",
    "Feature Limitations, Fuel Cost Considerations",
    "Tire Quality Concerns, Ride Quality, Performance, Adaptive Cruise Control Limitations, Sensor Reliability, Technology Limitations, Customer Service",
    "Comfort & Ride Quality, Luxury Features, Technology & Safety",
    "Door Lock Functionality Concerns, Usability Issues with Technology, Camera Calibration, Design & Ride Quality"
]

# Split each new group by commas, clean up whitespace, and flatten the list
new_split_items = [item.strip() for group in new_groups for item in group.split(",") if item.strip()]

# Convert the split list to a DataFrame and drop duplicates
df_new_split = pd.DataFrame(new_split_items, columns=["Reason"]).drop_duplicates().reset_index(drop=True)

# Assign a numerical code to each unique reason
df_new_split['Code'] = df_new_split.index + 1  # Start codes from 1




In [9]:
# Save dataframe to CSV
df_new_split.to_csv('Reasons_of_complaints.csv', index=False)

# Encode reasons to matching numbers - code of reasons

In [27]:
df = pd.read_csv('Extracted data.csv')  # Replace 'your_file.csv' with the path to your file

In [29]:
# Drop columns that contain only NaN values - delete redundant columns copied from Excel
df = df.dropna(axis=1, how='all')

In [31]:
# Step 1: Flatten and extract unique reasons to assign codes
unique_reasons = set()
for reasons in df['Reason_complaint']:
    for reason in reasons.split(','):
        unique_reasons.add(reason.strip())

# Create a mapping of each reason to a unique code
reason_to_code = {reason: code for code, reason in enumerate(unique_reasons, start=1)}

# Step 2: Encode each row based on the reason_to_code mapping
def encode_reasons(reason_str):
    reasons = reason_str.split(',')
    return [reason_to_code[reason.strip()] for reason in reasons]

# Apply the encoding function to the column
df['Reason_Code'] = df['Reason_complaint'].apply(encode_reasons)

# Display the DataFrame with encoded values
df

Unnamed: 0,Date,Author_Name,Vehicle_Title,Review_Title,Review,Rating,Reason_complaint,Reason_Code
0,09/17/17,tim mcc,2017 Genesis G90 Sedan Premium 4dr Sedan AWD (...,dealership/repairs,my g90 has less than 200 miles in shop for tw...,1,"Service Quality, Vehicle Reliability","[27, 10]"
1,08/21/17,Richard N,2017 Genesis G90 Sedan Ultimate 4dr Sedan (5.0...,My car is getting lots of respect from Mercede...,"Somehow, the G90 looks perfect to my country ...",4,Car features,[25]
2,08/03/17,Rick,2017 Genesis G90 Sedan Premium 4dr Sedan AWD (...,Traded my Lexus LS for a Genesis G90!,I've owned Lexus cars exclusively for the pas...,1,"Brand Loyalty Shift, Interior Features & Comfo...","[12, 5, 39, 19]"
3,08/04/18,tim m,2018 Genesis G90 Sedan Ultimate 4dr Sedan AWD ...,avoid like the plague,bought first one owned for 10 weeks in the sh...,1,"Vehicle Reliability, Service Quality","[10, 27]"
4,07/02/18,Joe Margucci,2018 Genesis G90 Sedan Premium 4dr Sedan (3.3L...,"""Acceleration failure"" - Genesis phraseology",My 2018 G90 has an acceleration problems whic...,3,"Safety Concerns, Vehicle Performance, Customer...","[32, 11, 39]"
5,06/02/18,Randy Terry,2018 Genesis G90 Sedan Ultimate 4dr Sedan (5.0...,Nearest Thing To A Cruise Ship?,I found the conservative exterior design to b...,4,"Exterior Design, Interior Features & Comfort, ...","[4, 5, 24]"
6,01/24/18,Robert Mojica,2018 Genesis G90 Sedan Ultimate 4dr Sedan AWD ...,2018 G-90 3.3T & 5.0 AWD,For the amount of money that is being asked f...,3,"Pricing Concerns, Brand Perception, Depreciati...","[16, 26, 14, 4, 29]"
7,05/24/18,Charlie Brown,2017 Genesis G80 Sedan 5.0 Ultimate 4dr Sedan ...,Electronic Warefare,The electronics have some gremlins at work al...,4,"Electronic Reliability, System Integration, Us...","[38, 17, 28]"
8,02/06/18,Allen C,2017 Genesis G80 Sedan 5.0 Ultimate 4dr Sedan ...,"Watch what is covered by the warranty. GOT ya""s","dealer service every 3000 miles,change oil,ch...",4,"Maintenance Cost, Warranty Support, Customer S...","[35, 36, 39]"
9,05/16/17,Chris Lopez,2017 Genesis G80 Sedan 3.8 4dr Sedan AWD (3.8L...,Almost there!,A Lexus ES350 similarly equipped will cost le...,4,"Dealer Concerns, Material Quality, Resale Valu...","[18, 1, 34, 33]"


In [32]:
# Use explode to split each list in 'Encoded' into separate rows
df_expanded = df.explode('Reason_Code').reset_index(drop=True)
df_expanded.head(5)

Unnamed: 0,Date,Author_Name,Vehicle_Title,Review_Title,Review,Rating,Reason_complaint,Reason_Code
0,09/17/17,tim mcc,2017 Genesis G90 Sedan Premium 4dr Sedan AWD (...,dealership/repairs,my g90 has less than 200 miles in shop for tw...,1,"Service Quality, Vehicle Reliability",27
1,09/17/17,tim mcc,2017 Genesis G90 Sedan Premium 4dr Sedan AWD (...,dealership/repairs,my g90 has less than 200 miles in shop for tw...,1,"Service Quality, Vehicle Reliability",10
2,08/21/17,Richard N,2017 Genesis G90 Sedan Ultimate 4dr Sedan (5.0...,My car is getting lots of respect from Mercede...,"Somehow, the G90 looks perfect to my country ...",4,Car features,25
3,08/03/17,Rick,2017 Genesis G90 Sedan Premium 4dr Sedan AWD (...,Traded my Lexus LS for a Genesis G90!,I've owned Lexus cars exclusively for the pas...,1,"Brand Loyalty Shift, Interior Features & Comfo...",12
4,08/03/17,Rick,2017 Genesis G90 Sedan Premium 4dr Sedan AWD (...,Traded my Lexus LS for a Genesis G90!,I've owned Lexus cars exclusively for the pas...,1,"Brand Loyalty Shift, Interior Features & Comfo...",5


In [33]:
# Drop out the column "Reason_complaint"
df_expanded = df_expanded.drop(columns=['Reason_complaint'])
df_expanded

Unnamed: 0,Date,Author_Name,Vehicle_Title,Review_Title,Review,Rating,Reason_Code
0,09/17/17,tim mcc,2017 Genesis G90 Sedan Premium 4dr Sedan AWD (...,dealership/repairs,my g90 has less than 200 miles in shop for tw...,1,27
1,09/17/17,tim mcc,2017 Genesis G90 Sedan Premium 4dr Sedan AWD (...,dealership/repairs,my g90 has less than 200 miles in shop for tw...,1,10
2,08/21/17,Richard N,2017 Genesis G90 Sedan Ultimate 4dr Sedan (5.0...,My car is getting lots of respect from Mercede...,"Somehow, the G90 looks perfect to my country ...",4,25
3,08/03/17,Rick,2017 Genesis G90 Sedan Premium 4dr Sedan AWD (...,Traded my Lexus LS for a Genesis G90!,I've owned Lexus cars exclusively for the pas...,1,12
4,08/03/17,Rick,2017 Genesis G90 Sedan Premium 4dr Sedan AWD (...,Traded my Lexus LS for a Genesis G90!,I've owned Lexus cars exclusively for the pas...,1,5
5,08/03/17,Rick,2017 Genesis G90 Sedan Premium 4dr Sedan AWD (...,Traded my Lexus LS for a Genesis G90!,I've owned Lexus cars exclusively for the pas...,1,39
6,08/03/17,Rick,2017 Genesis G90 Sedan Premium 4dr Sedan AWD (...,Traded my Lexus LS for a Genesis G90!,I've owned Lexus cars exclusively for the pas...,1,19
7,08/04/18,tim m,2018 Genesis G90 Sedan Ultimate 4dr Sedan AWD ...,avoid like the plague,bought first one owned for 10 weeks in the sh...,1,10
8,08/04/18,tim m,2018 Genesis G90 Sedan Ultimate 4dr Sedan AWD ...,avoid like the plague,bought first one owned for 10 weeks in the sh...,1,27
9,07/02/18,Joe Margucci,2018 Genesis G90 Sedan Premium 4dr Sedan (3.3L...,"""Acceleration failure"" - Genesis phraseology",My 2018 G90 has an acceleration problems whic...,3,32


In [26]:
df_expanded.to_csv('Processed_data.csv', index=False)