In [None]:
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import MinMaxScaler

def get_user_input_for_categories(categories):
    user_scores = {}
    print("Please rate the importance of each category on a scale from 1 to 5.")
    for category in categories:
        while True:  # Keep asking until a valid input is provided
            try:
                score = int(input(f"Enter your rating for {category} (1 to 5): "))
                if score < 1 or score > 5:
                    raise ValueError("Invalid input. Rating must be between 1 and 5. Please try again.")
                user_scores[category] = score
                break  # Exit the loop for this category if input is valid
            except ValueError as e:
                print(e)  # Print the error message and re-prompt for the same category
    return user_scores


def apply_predefined_weights(input_scores):
    weight_mapping = {1: 0.66, 2: 0.13, 3: 0.2, 4: 0.266, 5: 0.33}
    weighted_scores = {category: weight_mapping[score] for category, score in input_scores.items()}
    total_weight = sum(weighted_scores.values())
    normalized_weights = {category: weight / total_weight for category, weight in weighted_scores.items()}
    return normalized_weights

def calculate_ahp_scores(df, category_weights):
    df['AHP Score'] = 0
    for cat, weight in category_weights.items():
        df['AHP Score'] += df[cat] * weight
    return df

def main():
    df = pd.read_csv(r'E:\Concordia University\Term 4\Capestone\Test code\data cleaning\7k data\Cleaned_Data.csv')
    categories = ["rating", "acidity_structure", "aftertaste", "aroma", "flavor"]
    user_input_scores = get_user_input_for_categories(categories)
    category_weights = apply_predefined_weights(user_input_scores)
    df_filtered = df[['title'] + categories].copy()
    df_ahp = calculate_ahp_scores(df_filtered, category_weights)
    
    # AHP Best Vendor
    best_vendor_ahp = df_ahp.loc[df_ahp['AHP Score'].idxmax(), 'title']
    print("\nBest Vendor Name based on AHP:", best_vendor_ahp)
    
    # Linear Regression Predictions
    scaler = MinMaxScaler()
    features_scaled = scaler.fit_transform(df[categories])
    model = LinearRegression()
    model.fit(features_scaled, df_ahp['AHP Score'])
    df_ahp['Predicted AHP Score'] = model.predict(features_scaled)
    top_5_vendors_lr = df_ahp.sort_values(by='Predicted AHP Score', ascending=False).head(5)
    
    print("\nTop 5 Vendors based on Linear Regression Predicted AHP:")
    for index, row in top_5_vendors_lr.iterrows():
        print(f"{row['title']}: Predicted AHP Score = {row['Predicted AHP Score']}")

if __name__ == "__main__":
    main()
