In [None]:
%pyspark
# Analyze check-in patterns
checkin_patterns = checkin_df.groupBy("date").count()

def generate_business_insights(business_id):
    """
    Generate strategic recommendations using ChatGPT
    """
    checkin_data = checkin_df.filter(
        checkin_df.business_id == business_id
    ).groupBy("hour").count().toPandas()
    
    prompt = f"""
    Based on these check-in patterns: {checkin_data.to_dict()},
    provide 3 business optimization suggestions for business {business_id}.
    """
    
    response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=[{"role": "user", "content": prompt}]
    )
    return response.choices[0].message.content

    #by Mohamed

In [None]:
%pyspark
from pyspark.sql.functions import lit

def get_personalized_recommendations(user_id, latitude=None, longitude=None):
    """
    Combined recommendation system
    """
    # Get collaborative filtering recommendations
    cf_recs = model.recommendForUserSubset(
        spark.createDataFrame([(user_id,)], ["user_id"]), 5
    )
    
    # Get location-based recommendations
    if latitude and longitude:
        loc_recs = restaurant_df.withColumn(
            "distance", 
            calculate_distance(
                restaurant_df.latitude, 
                restaurant_df.longitude, 
                lit(latitude), 
                lit(longitude)
            )
        ).orderBy("distance").limit(5)
    
    # Combine and return results with ChatGPT explanations
    final_recs = []
    for rec in cf_recs.take(5):
        explanation = generate_recommendation_explanation(user_id, rec.business_id)
        final_recs.append({
            "business_id": rec.business_id,
            "explanation": explanation
        })
    

    return final_recs

    #by Mohamed

In [0]:
%pyspark

from pyspark.sql import SparkSession
from pyspark.ml.feature import StringIndexer
from pyspark.ml.recommendation import ALS
from pyspark.sql.functions import *
import os

# Initialize Spark with GraphFrames and OOM protection
spark = SparkSession.builder \
    .appName("UnifiedRecSys") \
    .config("spark.sql.warehouse.dir", "/user/hive/warehouse") \
    .config("spark.executor.memory", "4g") \
    .config("spark.driver.memory", "4g") \
    .config("spark.sql.shuffle.partitions", "50") \
    .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer") \
    .config("spark.jars.packages", "graphframes:graphframes:0.8.3-spark3.1-s_2.12") \
    .enableHiveSupport() \
    .getOrCreate()

# Import GraphFrame AFTER Spark session is initialized
from graphframes import GraphFrame

# Configure your custom ChatGPT client
from openai import OpenAI

client = OpenAI(
    base_url='https://xiaoai.plus/v1',
    api_key='sk-WvIc4NVMTcUwuqa5xVrHG0VG3V2m2xeAK9Umx0NlDD4ZLPFL'
)


#by RIDA

In [None]:
%pyspark
# Clean business data
business_clean = business_df.filter(
    (business_df.categories.isNotNull()) & 
    (business_df.latitude.isNotNull()) & 
    (business_df.longitude.isNotNull())
)

# Create restaurant subset
restaurant_categories = ["American", "Mexican", "Italian", "Japanese", "Chinese"]
restaurant_df = business_clean.filter(
    business_clean.categories.contains("Restaurants") & 
    business_clean.categories.rlike("|".join(restaurant_categories))
)

#by rida 

In [None]:
%pyspark
from pyspark.ml.recommendation import ALS

# Prepare review data
als_data = review_df.select(
    "user_id", 
    "business_id", 
    review_df.stars.alias("rating")
)

# Train ALS model
als = ALS(
    maxIter=5, 
    regParam=0.01, 
    userCol="user_id", 
    itemCol="business_id", 
    ratingCol="rating"
)
model = als.fit(als_data)

#by rida

In [None]:
%pyspark
import openai

# Initialize ChatGPT API
openai.api_key ='sk-WvIc4NVMTcUwuqa5xVrHG0VG3V2m2xeAK9Umx0NlDD4ZLPFL'

def generate_recommendation_explanation(user_id, business_id):
    """
    Generate natural language explanation for recommendations
    """
    business_info = business_df.filter(
        business_df.business_id == business_id
    ).first()
    
    prompt = f"""
    Explain why user {user_id} might like {business_info.name}, 
    a {business_info.categories} business with {business_info.stars} stars, 
    known for: {business_info.attributes}.
    """
    
    response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=[{"role": "user", "content": prompt}]
    )
    return response.choices[0].message.content

    #by aya

End