In [None]:
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
import warnings
warnings.filterwarnings('ignore')

# --- PREVENT TRUNCATION ---
pd.set_option('display.max_rows', None) 
pd.set_option('display.max_columns', None)
pd.set_option('display.width', 1000)

class SkillDev:
    def __init__(self, file_path, use_llama=True, model_name="meta-llama/Llama-3.2-1B-Instruct"):
        print(f"--- üöÄ Loading Dataset: {file_path} ---")
        self.df = pd.read_csv(file_path)
        self.scaler = StandardScaler()
        self.use_llama = use_llama
        
        # Initialize LLAMA4 Model
        if self.use_llama:
            print(f"--- ü§ñ Loading LLAMA Model: {model_name} ---")
            try:
                self.tokenizer = AutoTokenizer.from_pretrained(model_name)
                self.llama_model = AutoModelForCausalLM.from_pretrained(
                    model_name,
                    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
                    device_map="auto" if torch.cuda.is_available() else None,
                    low_cpu_mem_usage=True
                )
                self.device = "cuda" if torch.cuda.is_available() else "cpu"
                print(f"‚úÖ LLAMA Model loaded successfully on {self.device}")
            except Exception as e:
                print(f"‚ö†Ô∏è Could not load LLAMA model: {e}")
                print("Continuing without LLAMA integration...")
                self.use_llama = False
        
        self._prepare_data()

    def _prepare_data(self):
        cols = ['AGE', 'EDU', 'Q8', 'Q20', 'Q45_A_1', 'SEX']
        for col in cols:
            if col in self.df.columns:
                self.df[col] = pd.to_numeric(self.df[col], errors='coerce').fillna(0)
        self.features = ['AGE', 'EDU', 'Q20', 'Q45_A_1']

    def _llama_understand_intent(self, prompt):
        """Use LLAMA to understand user intent and extract parameters"""
        if not self.use_llama:
            return self._rule_based_intent(prompt)
        
        system_prompt = """You are an AI assistant for a welfare distribution system. 
Analyze the user's request and determine:
1. Target Group: 'women_sewing', 'farmers', or 'general'
2. Resource Type: what is being distributed
3. Number of items: if mentioned

Respond in JSON format: {"target_group": "...", "resource": "...", "quantity": number or null}"""

        messages = [
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": prompt}
        ]
        
        try:
            inputs = self.tokenizer.apply_chat_template(messages, return_tensors="pt", add_generation_prompt=True)
            if self.device == "cuda":
                inputs = inputs.to(self.device)
            
            outputs = self.llama_model.generate(
                inputs,
                max_new_tokens=150,
                temperature=0.3,
                do_sample=True,
                pad_token_id=self.tokenizer.eos_token_id
            )
            
            response = self.tokenizer.decode(outputs[0][inputs.shape[1]:], skip_special_tokens=True)
            print(f"ü§ñ LLAMA Intent Analysis: {response}")
            
            # Parse LLAMA response (simple fallback if JSON parsing fails)
            if "women_sewing" in response.lower() or "sewing" in prompt.lower():
                return "women_sewing"
            elif "farmer" in response.lower() or "farmer" in prompt.lower() or "tractor" in prompt.lower():
                return "farmers"
            else:
                return "general"
                
        except Exception as e:
            print(f"‚ö†Ô∏è LLAMA intent analysis failed: {e}")
            return self._rule_based_intent(prompt)

    def _rule_based_intent(self, prompt):
        """Fallback rule-based intent detection"""
        if "sewing" in prompt.lower():
            return "women_sewing"
        elif "farmer" in prompt.lower() or "tractor" in prompt.lower():
            return "farmers"
        else:
            return "general"

    def _llama_generate_explanation(self, cluster_summary, target_cluster, eligible_count, intent):
        """Use LLAMA to generate natural language explanation of AI decision"""
        if not self.use_llama:
            return f"Selected Cluster {target_cluster} with {eligible_count} recipients (Lowest Income Group)."
        
        summary_text = cluster_summary.to_string()
        
        prompt = f"""Based on this welfare distribution analysis:

Cluster Statistics:
{summary_text}

Selected Cluster: {target_cluster}
Number of Eligible Recipients: {eligible_count}
Distribution Type: {intent}

Provide a clear, empathetic explanation (2-3 sentences) of why this cluster was selected and what it means for the beneficiaries."""

        messages = [
            {"role": "system", "content": "You are a compassionate AI welfare officer explaining distribution decisions."},
            {"role": "user", "content": prompt}
        ]
        
        try:
            inputs = self.tokenizer.apply_chat_template(messages, return_tensors="pt", add_generation_prompt=True)
            if self.device == "cuda":
                inputs = inputs.to(self.device)
            
            outputs = self.llama_model.generate(
                inputs,
                max_new_tokens=200,
                temperature=0.7,
                do_sample=True,
                pad_token_id=self.tokenizer.eos_token_id
            )
            
            explanation = self.tokenizer.decode(outputs[0][inputs.shape[1]:], skip_special_tokens=True)
            return explanation.strip()
            
        except Exception as e:
            print(f"‚ö†Ô∏è LLAMA explanation generation failed: {e}")
            return f"Selected Cluster {target_cluster} with {eligible_count} recipients (Lowest Income Group)."

    def run_scenario(self, prompt, n_clusters=3):
        print(f"\n{'='*70}")
        print(f"üí¨ PROMPT: \"{prompt}\"")
        
        # 1. LLAMA-Enhanced Intent Understanding
        intent = self._llama_understand_intent(prompt)
        
        if intent == "women_sewing":
            target_group = self.df[(self.df['SEX'] == 2) & (self.df['Q8'].astype(str).str.startswith(('7', '9')))].copy()
            label = "Vulnerable Women for Sewing Machines"
        elif intent == "farmers":
            target_group = self.df[self.df['Q8'].astype(str).str.startswith('6')].copy()
            label = "Agricultural Sector for Tractors/Fertilizer"
        else:
            target_group = self.df.copy()
            label = "General Population"

        if target_group.empty:
            print("‚ö†Ô∏è No matching records found.")
            return

        # 2. AI Clustering
        X = self.scaler.fit_transform(target_group[self.features])
        kmeans = KMeans(n_clusters=n_clusters, n_init=10, random_state=42)
        target_group['cluster_id'] = kmeans.fit_predict(X)

        # 3. Cluster Summary
        summary = target_group.groupby('cluster_id')[self.features].mean()
        summary['Size'] = target_group.groupby('cluster_id').size()
        print(f"\n--- üìä CLUSTER PROFILES ({label}) ---")
        print(summary.rename(columns={'Q45_A_1': 'Avg Income (LKR)', 'Q20': 'Weekly Hours'}).round(2))

        # 4. Identify Target Cluster
        neediest_id = summary['Q45_A_1'].idxmin()
        all_eligible = target_group[target_group['cluster_id'] == neediest_id].copy()

        # 5. LLAMA-Generated Explanation
        explanation = self._llama_generate_explanation(summary, neediest_id, len(all_eligible), intent)
        print(f"\nüìå AI DECISION (LLAMA-Enhanced):")
        print(f"{explanation}")
        print(f"\nüì¢ FULL ELIGIBILITY LIST: Found {len(all_eligible)} recipients.")
        print("-" * 70)
        
        print(all_eligible[['AGE', 'SEX', 'Q8', 'Q20', 'Q45_A_1']])
        print("-" * 70)

# --- EXECUTION ---
# Initialize with LLAMA4 integration
# Note: You may need to login to HuggingFace and accept LLAMA model terms
# Run: huggingface-cli login
system = SkillDev('LFS-2023.csv', use_llama=True)


while True:
    n=input("Enter prompts")
    system.run_scenario(n)


--- üöÄ Loading Dataset: LFS-2023.csv ---
--- ü§ñ Loading LLAMA Model: meta-llama/Llama-3.2-1B-Instruct ---
‚ö†Ô∏è Could not load LLAMA model: You are trying to access a gated repo.
Make sure to have access to it at https://huggingface.co/meta-llama/Llama-3.2-1B-Instruct.
401 Client Error. (Request ID: Root=1-69800476-682281972aa43f7c1cc35c7d;1e2f944b-5fea-4f65-96bc-e633ae8639d3)

Cannot access gated repo for url https://huggingface.co/meta-llama/Llama-3.2-1B-Instruct/resolve/main/config.json.
Access to model meta-llama/Llama-3.2-1B-Instruct is restricted. You must have access to it and be authenticated to access it. Please log in.
Continuing without LLAMA integration...


## Alternative: Run Without LLAMA (if model loading fails)

If you encounter issues loading LLAMA or want to run without it:

In [None]:
# Run without LLAMA integration
system_basic = SkillDev('LFS-2023.csv', use_llama=False)
system_basic.run_scenario("I have 100 sewing machines find suitable people for that")