In [17]:
from flask import Flask, request, jsonify
from pyngrok import ngrok
import pandas as pd
from transformers import pipeline

# Initialize Flask app
app = Flask(__name__)

# Initialize Hugging Face summarization model
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")

# Data ingestion function
def ingest_data(file):
    if file.filename.endswith('.csv'):
        data = pd.read_csv(file)
    elif file.filename.endswith('.json'):
        data = pd.read_json(file)
    else:
        return "Unsupported file format", 400
    data.to_json('sales_data.json', orient='records')  # Save the data
    return data

# Function to format individual data as text
def format_individual_data(data):
    # Create a summary string using relevant columns
    summary = f"Employee ID: {data['employee_id'].values[0]}\n"
    summary += f"Employee Name: {data['employee_name'].values[0]}\n"
    summary += f"Date: {data['dated'].values[0]}\n"
    summary += f"Leads Taken: {data['lead_taken'].values[0]}\n"
    summary += f"Tours Booked: {data['tours_booked'].values[0]}\n"
    summary += f"Applications: {data['applications'].values[0]}\n"
    summary += f"Tours per Lead: {data['tours_per_lead'].values[0]}\n"
    summary += f"Apps per Tour: {data['apps_per_tour'].values[0]}\n"
    summary += f"Revenue Confirmed: {data['revenue_confirmed'].values[0]}\n"
    summary += f"Revenue Pending: {data['revenue_pending'].values[0]}\n"
    return summary

# Function to format team data as text
def format_team_data(data):
    total_leads = data['lead_taken'].sum()
    total_tours = data['tours_booked'].sum()
    total_revenue_confirmed = data['revenue_confirmed'].sum()
    avg_close_rate = data['avg_close_rate_30_days'].mean()
    summary = f"Total Leads Taken: {total_leads}\n"
    summary += f"Total Tours Booked: {total_tours}\n"
    summary += f"Total Revenue Confirmed: {total_revenue_confirmed}\n"
    summary += f"Average Close Rate (30 days): {avg_close_rate}\n"
    return summary

# Function to analyze data with Hugging Face

def analyze_data(data, query):
    # Create a more specific prompt for individual or team performance analysis
    prompt = (
        f"You are an expert sales analyst. Based on the following sales data, {query}. "
        f"Here is the data: {data}. Please provide detailed feedback and actionable insights."
    )
    response = summarizer(
        prompt,
        max_length=150,
        min_length=50,
        do_sample=False,
        clean_up_tokenization_spaces=False
    )
    return response[0]['summary_text']


@app.route('/individual/<name>', methods=['GET'])
def individual_performance(name):
    data = pd.read_json('sales_data.json')
    individual_data = data[data['employee_name'] == name]  # Ensure correct employee name matching
    if individual_data.empty:
        return jsonify({"error": "No data found for the specified employee."}), 404

    # Format the data for readability in the LLM prompt
    formatted_data = individual_data.to_dict(orient='records')[0]  # Convert to dictionary for clear prompt
    feedback = analyze_data(formatted_data, "provide feedback on individual performance")

    return jsonify({"feedback": feedback})

# Upload endpoint
@app.route('/upload', methods=['POST'])
def upload_file():
    if 'file' not in request.files:
        return "No file part", 400
    file = request.files['file']
    data = ingest_data(file)
    if isinstance(data, tuple):
        return data  # Error response
    return jsonify({"message": "Data uploaded successfully"}), 200

# 1. Individual Sales Representative Performance Analysis
@app.route('/api/rep_performance', methods=['GET'])
def rep_performance():
    rep_id = request.args.get('rep_id')
    if not rep_id:
        return jsonify({"error": "rep_id parameter is required"}), 400

    data = pd.read_json('sales_data.json')
    individual_data = data[data['employee_id'] == int(rep_id)]  # Ensure column name matches

    if individual_data.empty:
        return jsonify({"error": f"No data found for rep_id {rep_id}"}), 404

    formatted_data = format_individual_data(individual_data)

    feedback = analyze_data(
        formatted_data,
        f"Provide a performance analysis for sales representative {rep_id} based on the following data:"
    )
    return jsonify({"feedback": feedback})

# 2. Overall Sales Team Performance Summary

@app.route('/api/team_performance', methods=['GET'])
def team_performance():
    try:
        data = pd.read_json('sales_data.json')

        # Summarize the data
        total_revenue = data['revenue_confirmed'].sum()
        total_tours_booked = data['tours_booked'].sum()
        avg_close_rate = data['avg_close_rate_30_days'].mean()

        # Create a prompt for the LLM
        prompt = (
            f"The sales team has a total revenue of {total_revenue}, "
            f"total tours booked: {total_tours_booked}, "
            f"and an average close rate of {avg_close_rate}. "
            "Provide a summary of the overall team performance."
        )

        # Update this line to include the query
        feedback = analyze_data(prompt, "provide a summary of overall team performance")

        return jsonify({"feedback": feedback}), 200
    except Exception as e:
        return jsonify({"error": str(e)}), 500

@app.route('/api/performance_trends', methods=['GET'])
def performance_trends():
    time_period = request.args.get('time_period')

    try:
        data = pd.read_json('sales_data.json')

        if data.empty:
            return jsonify({"error": "No sales data available"}), 404

        # Ensure the 'dated' column is in datetime format
        data['dated'] = pd.to_datetime(data['dated'], errors='coerce')

        # Group data based on the time period and sum only numeric columns
        if time_period == 'yearly':
            trends = data.groupby(data['dated'].dt.year).sum(numeric_only=True)
        elif time_period == 'monthly':
            trends = data.groupby(data['dated'].dt.to_period('M')).sum(numeric_only=True)
        elif time_period == 'quarterly':
            trends = data.groupby(data['dated'].dt.to_period('Q')).sum(numeric_only=True)
        else:
            return jsonify({"error": "Invalid time_period value"}), 400

        # Create a summary or insights based on trends
        feedback = f"Trends for {time_period}: {trends.to_json()}"

        return jsonify({"feedback": feedback}), 200

    except Exception as e:
        return jsonify({"error": str(e)}), 500


def filter_data_by_time_period(data, period):
    # Implement logic to filter or summarize data based on the specified time period
    # For now, let's return all data as a placeholder
    return data

# Home route
@app.route('/')
def home():
    return "Hello from Flask in Colab!"

# Start the ngrok tunnel and run the Flask app
public_url = ngrok.connect(5000)
print(f"Public URL: {public_url}")

app.run(port=5000)




Public URL: NgrokTunnel: "https://c1c6-104-155-230-138.ngrok-free.app" -> "http://localhost:5000"
 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m
INFO:werkzeug:127.0.0.1 - - [22/Sep/2024 12:49:26] "HEAD / HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [22/Sep/2024 12:49:26] "HEAD / HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [22/Sep/2024 12:49:27] "HEAD / HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [22/Sep/2024 12:49:38] "POST /upload HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [22/Sep/2024 12:49:46] "GET /api/performance_trends?time_period=yearly HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [22/Sep/2024 12:50:20] "GET /api/performance_trends?time_period=monthly HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [22/Sep/2024 12:54:18] "HEAD / HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [22/Sep/2024 12:54:20] "HEAD / HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [22/Sep/2024 12:54:28] "[31m[1mPOST / HTTP/1.1[0m" 405 -
INFO:werkzeug:127.0.0.1 - - [22/Sep/2024 12:54:39] "HEAD / HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [22/Sep/2024 12:56:29] "GET / HTTP/1.1" 20