In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

# Charger et pr√©parer le jeu de donn√©es
def load_and_preprocess(file_path):
    df = pd.read_csv(file_path)
    x = df.drop("Revenue", axis=1)
    y = df["Revenue"]
    return x, y

# Entra√Æner le mod√®le de r√©gression lin√©aire
def train_model(x, y):
    numeric_features = ["Marketing_Spend", "R&D_Spend", "Administration_Costs", "Number_of_Employees"]
    categorical_features = ["Region"]

    preprocessor = ColumnTransformer(transformers=[
        ("num", StandardScaler(), numeric_features),
        ("cat", OneHotEncoder(), categorical_features)
    ])

    pipeline = Pipeline(steps=[
        ("preprocessor", preprocessor),
        ("regressor", LinearRegression())
    ])

    pipeline.fit(x, y)
    return pipeline

# √âvaluer les performances du mod√®le
def evaluate_model(model, x, y):
    predictions = model.predict(x)
    mae = mean_absolute_error(y, predictions)
    rmse = np.sqrt(mean_squared_error(y, predictions))
    r2 = r2_score(y, predictions)
    print(f"MAE : {mae:.2f}, RMSE : {rmse:.2f}, R¬≤ : {r2:.2f}")

# Pr√©dire le chiffre d'affaires √† partir des donn√©es utilisateur
def predict_revenue(model):
    print("\nEntrez les donn√©es de l'entreprise : ")
    marketing = float(input("Marketing_Spend : "))
    rmd = float(input("R&D_Spend : "))
    admin = float(input("Administration_Costs : "))
    employees = int(input("Number_of_Employees : "))
    region = input("R√©gion (Am√©rique du Nord, Europe, Asie) : ")

    user_df = pd.DataFrame([{
        "Marketing_Spend": marketing,
        "R&D_Spend": rmd,
        "Administration_Costs": admin,
        "Number_of_Employees": employees,
        "Region": region
    }])

    prediction = model.predict(user_df)[0]
    print(f"\nüîÆ Chiffre d'affaires pr√©dit : {prediction:.2f}\n")

# Programme principal
def main():
    file_path = "../Data/788438_data.csv"
    x, y = load_and_preprocess(file_path)
    model = train_model(x, y)
    evaluate_model(model, x, y)

    while True:
        print("\n--- MENU ---")
        print("1. Pr√©dire les revenus")
        print("2. Quitter")
        choice = input("Choix : ")

        if choice == "1":
            predict_revenue(model)
        elif choice == "2":
            print("‚úÖ Programme termin√©.")
            break
        else:
            print("‚ùå Choix invalide.")

if __name__ == "__main__":
    main()


from sklearn.preprocessing import OneHotEncoder

categorical_features = ["Region"]
regions_connues = [["Am√©rique du Nord", "Europe", "Asie"]]

preprocessor = ColumnTransformer(
    transformers=[
        ("num", StandardScaler(), numeric_features),
        ("cat", OneHotEncoder(categories=regions_connues, handle_unknown='ignore'), categorical_features)
    ]
)