In [None]:
#!pip install -U scikit-learn imbalanced-learn xgboost pandas numpy requests



In [None]:
import pandas as pd
import numpy as np
import joblib
import requests
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import SMOTE
import xgboost as xgb
import os


In [None]:
df = pd.read_csv("distributor_dataset.csv")

def compute_loyalty(df):
    df = df.copy()
    df["Loyalty_Score"] = (
        df["Sales_Quantity"] * 0.3 +
        df["Reward_Points"] * 0.002 +
        df["Inventory_Level"] * 0.1 -
        df["Churn_Probability"] * 50 -
        abs(df["Base_Price"] - df["Competitor_Price"]) * 0.05
    )
    min_score, max_score = df["Loyalty_Score"].min(), df["Loyalty_Score"].max()
    df["Loyalty_Score"] = ((df["Loyalty_Score"] - min_score) / (max_score - min_score)) * 100
    return df

df = compute_loyalty(df)
df["Churn_Label"] = (df["Churn_Probability"] > 0.5).astype(int)


In [None]:
features = ["Sales_Quantity", "Inventory_Level", "Base_Price",
            "Competitor_Price", "Reward_Points", "Loyalty_Score"]

X, y = df[features], df["Churn_Label"]
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.2, random_state=42)
X_train_res, y_train_res = SMOTE(random_state=42).fit_resample(X_train, y_train)

xgb_clf = xgb.XGBClassifier(use_label_encoder=False, eval_metric="auc", random_state=42)
xgb_clf.fit(X_train_res, y_train_res)
joblib.dump(xgb_clf, "best_churn_model.joblib")


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


['best_churn_model.joblib']

In [None]:
def generate_data_context(df, limit=20):
    numeric_desc = df.describe(include='number').round(2).to_string()
    category_counts = df['Location_ID'].value_counts().head(5).to_dict()
    category_text = ", ".join([f"{k}: {v} distributors" for k, v in category_counts.items()])

    sample_rows = df.sample(n=min(limit, len(df)), random_state=42).to_dict("records")
    row_texts = []
    for r in sample_rows:
        row_texts.append(
            f"{r['Distributor_ID']} at {r['Location_ID']} → Sales: {r['Sales_Quantity']}, "
            f"Inventory: {r['Inventory_Level']}, Base ₹{r['Base_Price']}, Competitor ₹{r['Competitor_Price']}, "
            f"Reward: {r['Reward_Points']}, Churn: {r['Churn_Probability']:.2f}, Loyalty: {r['Loyalty_Score']:.2f}"
        )

    return f"""[DATA SUMMARY]
Top Locations: {category_text}
Numeric Summary:\n{numeric_desc}

[Sample Records]
""" + "\n".join(row_texts)


In [None]:
os.environ["GROQ_API_KEY"] = #enter your groq key in double quotes (https://console.groq.com/keys)
GROQ_API_KEY = os.getenv("GROQ_API_KEY")

headers = {
    "Authorization": f"Bearer {GROQ_API_KEY}",
    "Content-Type": "application/json"
}
groq_url = "https://api.groq.com/openai/v1/chat/completions"


In [None]:
def ask_groq_llama(question, context_text):
    payload = {
        "model": "llama3-8b-8192",
        "messages": [
            {"role": "system", "content": "You are an intelligent assistant that answers any question about a distributor dataset. Use the data context provided to reason, summarize, and explain clearly."},
            {"role": "user", "content": f"{context_text}\n\nQuestion: {question}"}
        ],
        "temperature": 0.5
    }
    try:
        res = requests.post(groq_url, headers=headers, json=payload)
        res.raise_for_status()
        return res.json()["choices"][0]["message"]["content"]
    except Exception as e:
        return f"LLM Error: {str(e)}"


In [None]:
print("🤖 Loyalty Testing Bot (LLM-Driven) Ready! Ask anything. Type 'exit' to quit.\n")
model = joblib.load("best_churn_model.joblib")

while True:
    q = input("You: ").strip()
    if q.lower() in ["exit", "quit"]:
        print("Bot: See you next time! 👋")
        break

    # Optional: Inject prediction if user asks
    if "churn" in q.lower() and "DIST_" in q:
        dist_id = [w for w in q.split() if w.startswith("DIST_")][0]
        rec = df[df["Distributor_ID"] == dist_id]
        if not rec.empty:
            X_rec = rec[features]
            prob = model.predict_proba(X_rec)[:, 1][0]
            print(f"Bot: Predicted churn probability for {dist_id} is {prob:.2f}")
            continue

    context_text = generate_data_context(df)
    print("Bot:", ask_groq_llama(q, context_text))
    print("-" * 50)


🤖 Loyalty Testing Bot (LLM-Driven) Ready! Ask anything. Type 'exit' to quit.

You: top loyal distributors
Bot: Based on the provided data, I can help you identify the top loyal distributors. To do this, I'll sort the data by the "Loyalty_Score" column in descending order (highest to lowest) and then select the top n distributors.

Since there's no specific value for "n" provided, I'll assume you want to see the top 5 loyal distributors. Here are the results:

**Top 5 Loyal Distributors:**

1. DIST_8 at LOC_36 → Loyalty Score: 78.38
2. DIST_162 at LOC_8 → Loyalty Score: 61.52
3. DIST_8 at LOC_56 → Loyalty Score: 67.32
4. DIST_87 at LOC_94 → Loyalty Score: 61.80
5. DIST_337 at LOC_33 → Loyalty Score: 64.14

These distributors have the highest loyalty scores, indicating that they have consistently demonstrated high levels of loyalty to the brand.
--------------------------------------------------
You: exit
Bot: See you next time! 👋
