In [1]:
from pandasai import Agent
from pandasai.llm import OpenAI
import pandas as pd
import re
import numpy as np
import firebase_admin
from firebase_admin import credentials, firestore
import os

In [2]:
# Setup OpenAI key
os.environ["OPENAI_API_KEY"] = "sk-proj-psCCFjAmDWgtYmKfCSKxT3BlbkFJMK8cQo7orq1dGKeiFm6R"

In [3]:

# Initialize Firebase
cred = credentials.Certificate (r"C:\Users\dell\Downloads\vivid-5ive12elve-firebase-adminsdk-strt6-1598cd7023.json")
firebase_admin.initialize_app(cred)
db = firestore.client()

# Fetch data from Firebase
def fetch_data_from_firebase():
    docs = db.collection('Users').stream()
    data = []
    for doc in docs:
        doc_dict = doc.to_dict()
        if 'ProfessionAttributes' in doc_dict:
            for key, value in doc_dict.pop('ProfessionAttributes').items():
                doc_dict[key] = value
        data.append(doc_dict)
    df = pd.DataFrame(data)
    return df

df = fetch_data_from_firebase()

In [4]:


# Ensure numeric columns are converted to appropriate types
df['Height (In CM)'] = pd.to_numeric(df['Height (In CM)'], errors='coerce')
df['Weight'] = pd.to_numeric(df['Weight'], errors='coerce')
df['Age'] = pd.to_numeric(df['Age'], errors='coerce')
df['Salary'] = pd.to_numeric(df['Salary'], errors='coerce')

# Ensure Languages column is treated as strings
df['Languages (seperate by commas)'] = df['Languages (seperate by commas)'].astype(str).fillna('')

# List of columns relevant to the actor profession
actor_columns = [
    'FullName', 'Governorate', 'Phone', 'Profession', 'Age', 'Availablity', 'Eye color',
    'Hair Color', 'Height (In CM)', 'Languages (seperate by commas)', 'Salary', 'Special Skills', 'Title', 'Weight', 'ProfilePictureUrl'
]
# Similar definitions for other professions.
film_editor_columns = [
    'FullName', 'Governorate', 'Phone', 'Profession', 'Salary', 'Color Grading Skills',
    'Title', 'Editing Experience', 'Editing Software Proficiency', 'Filmmaking Experience',
    'Sound Editing and Mixing Skills', 'Special Effects and CGI Knowledge', 'Specialization', 'Years of Editing Experience'
]

first_ad_columns = [
    'FullName', 'Governorate', 'Phone', 'Profession', 'Availability Schedule', 'Director collaborations',
    'Experience', 'Preferred Work Geners', 'Production Credits', 'Qualifications', 'Salary', 'Skills', 'Title', 'Work Geners'
]

director_columns = [
    'FullName', 'Governorate', 'Phone', 'Profession', 'Cinematography Skills',
    'Communication Skills', 'Departments Knowledge', 'Collaboration Skills', 'Lenses and Cameras Knowledge', 'Production design', 'Salary',
    'Screenwriting Skills', 'Sound and Editing Skills', 'Title'
]

second_ac_columns = [
    'FullName', 'Governorate', 'Phone', 'Profession', 'Camera knowledge', 'DIT Collaboration',
    'Filming Experience', 'Lens Knowledge', 'On-set Problem-solving', 'Organizing skills', 'Qualifications',
    'Salary', 'Title'
]

# Ensure all_possible_columns contains unique column names
all_possible_columns = list(set(actor_columns + film_editor_columns + first_ad_columns + director_columns + second_ac_columns))

# Initialize DataFrame with all possible columns
df = df.reindex(columns=all_possible_columns)

In [5]:
llm = OpenAI()

In [6]:
def parse_gender(query):
    gender_keywords = {
        'female': ['female', 'woman', 'girl', 'lady', 'actress'],
        'male': ['male', 'man', 'boy', 'gentleman', 'actor']
    }
    for gender, keywords in gender_keywords.items():
        for keyword in keywords:
            if re.search(r'\b' + re.escape(keyword) + r'\b', query, re.IGNORECASE):
                return gender.capitalize()
    return None

def parse_height(query):
    height_match = re.search(r'height\s*(\d+)\s*-\s*(\d+)\s*cm', query, re.IGNORECASE)
    if height_match:
        return int(height_match.group(1)), int(height_match.group(2))
    height_single_match = re.search(r'height\s*(\d+)\s*cm', query, re.IGNORECASE)
    return (int(height_single_match.group(1)), int(height_single_match.group(1))) if height_single_match else None

def parse_weight(query):
    weight_match = re.search(r'weight\s*(\d+)\s*-\s*(\d+)\s*kg', query, re.IGNORECASE)
    if weight_match:
        return int(weight_match.group(1)), int(weight_match.group(2))
    weight_single_match = re.search(r'weight\s*(\d+)\s*kg', query, re.IGNORECASE)
    return (int(weight_single_match.group(1)), int(weight_single_match.group(1))) if weight_single_match else None
def parse_availability(query):
    availability_match = re.search(r'available\s*(part[-\s]*time|full[-\s]*time|only on weekend)', query, re.IGNORECASE)
    return availability_match.group(1).strip().lower() if availability_match else None

def parse_eye_color(query):
    eye_color_match = re.search(r'eye color\s*(\w+)', query, re.IGNORECASE)
    return eye_color_match.group(1).capitalize() if eye_color_match else None

def parse_languages(query):
    languages_match = re.search(r'(speak|languages)\s*([\w\s,]+)', query, re.IGNORECASE)
    if languages_match:
        languages = languages_match.group(2).split(',')
        return [lang.strip().capitalize() for lang in languages]
    return None

def parse_special_skills(query):
    skills_match = re.search(r'skills?\s*(with)?\s*([\w\s,]+)', query, re.IGNORECASE)
    if skills_match:
        skills = skills_match.group(2).split(',')
        return [skill.strip().capitalize() for skill in skills]
    return None

def parse_name(query):
    name_match = re.findall(r'\b([A-Z][a-z]* [A-Z][a-z]*)\b', query, re.IGNORECASE)
    if name_match:
        return ' '.join(name_match)
    return None

def parse_profession(query):
    profession_keywords = ['actor', 'actress', 'performer', 'film editor', '1st ad (assistant director)', 'director', '2nd ac (assistant camera)']
    for keyword in profession_keywords:
        if re.search(r'\b' + re.escape(keyword) + r'\b', query, re.IGNORECASE):
            return keyword.capitalize()
    return None
def parse_age(query):
    age_match = re.search(r'\b(\d+)\s*(?:years\s*old)?\b', query, re.IGNORECASE)
    return int(age_match.group(1)) if age_match else None

def parse_query(query):
    criteria = {}
    criteria['FullName'] = parse_name(query)
    criteria['Profession'] = parse_profession(query)
    criteria['Title'] = parse_gender(query)
    criteria['Age'] = parse_age(query)  # Update to parse age
    criteria['Height (In CM)'] = parse_height(query)
    criteria['Weight'] = parse_weight(query)
    criteria['Availablity'] = parse_availability(query)
    criteria['Eye color'] = parse_eye_color(query)
    criteria['Languages (seperate by commas)'] = parse_languages(query)
    criteria['Special Skills'] = parse_special_skills(query)
    return {k: v for k, v in criteria.items() if v is not None}

def apply_criteria(df, criteria):
    df = df.copy()

    if 'FullName' in criteria and criteria['FullName']:
        name = criteria.pop('FullName')
        name_match = df[df['FullName'].str.contains(name, case=False, na=False)]
        if not name_match.empty:
            profession = name_match['Profession'].iloc[0].lower()
            if profession == 'film editor':
                return name_match[film_editor_columns]
            elif profession == '1st ad (assistant director)':
                return name_match[first_ad_columns]
            elif profession == 'director':
                return name_match[director_columns]
            elif profession == '2nd ac (assistant camera)':
                return name_match[second_ac_columns]
            else:
                return name_match[actor_columns]

    if 'Profession' in criteria and criteria['Profession'] == 'Actor':
        df = df[df['Profession'].str.lower() == 'actor']

    if 'Title' in criteria:
        title = criteria.pop('Title')
        df = df[df['Title'].str.lower() == title.lower()]

    df['score'] = 0

    important_attributes = ['Availablity', 'Special Skills', 'Height (In CM)', 'Weight', 'Age']

    for column, value in criteria.items():
        weight = 1
        if column in important_attributes:
            weight = 3

        if column == 'Languages (seperate by commas)':
            for lang in value:
                if column in df:
                    df[column] = df[column].astype(str).fillna('')
                    df['score'] += np.where(df[column].str.contains(lang.strip(), case=False, na=False), weight, 0)
        elif column in ['Height (In CM)', 'Weight','Age']:
            if isinstance(value, tuple):  # Check if value is a tuple
                df['score'] += np.where((df[column] >= value[0]) & (df[column] <= value[1]), weight, 0)
            else:
                df['score'] += np.where(df[column] == value, weight, 0)  # Handle single value case
        elif column == 'Special Skills':
            for skill in value:
                if column in df:
                    df[column] = df[column].astype(str).fillna('')
                    df['score'] += np.where(df[column].str.contains(skill.strip(), case=False, na=False), weight, 0)
        else:
            if column in df:
                df[column] = df[column].astype(str).fillna('')
                df['score'] += np.where(df[column].str.lower() == value.lower(), weight, 0)

    # Debug print to check scores before sorting
    print("Scores before sorting:")
    print(df[['FullName', 'score']].sort_values(by='score', ascending=False).head(10))

    df_sorted = df.sort_values(by='score', ascending=False)

    exact_matches = df_sorted[df_sorted['score'] == len(criteria) * 3]  # Each attribute has a weight of 3

    if len(exact_matches) >= 3:
        return exact_matches[actor_columns].head(3)
    else:
        top_matches = df_sorted.head(3)
        return top_matches[actor_columns]


In [7]:
class CustomAgent:
    def __init__(self, df):
        self.df = df

    def search_by_name(self, name):
        user = self.df[self.df['FullName'].str.contains(name, case=False, na=False)]
        return user

    def chat(self, query):
        criteria = parse_query(query)
        print("Extracted Criteria:", criteria)

        if 'FullName' in criteria and len(criteria) == 1:
            user = self.search_by_name(criteria['FullName'])
            if not user.empty:
                profession = user['Profession'].iloc[0].lower()
                if profession == 'film editor':
                    return user[film_editor_columns]
                elif profession == '1st ad (assistant director)':
                    return user[first_ad_columns]
                elif profession == 'director':
                    return user[director_columns]
                elif profession == '2nd ac (assistant camera)':
                    return user[second_ac_columns]
                else:
                    return user[actor_columns]
            else:
                return "User not found"

        top_matches = apply_criteria(self.df, criteria)
        return top_matches

# Create an instance of the CustomAgent with the DataFrame
agent = CustomAgent(df)

In [8]:
# Create an instance of the CustomAgent with the DataFrame
agent = CustomAgent(df)
# Example query
response = agent.chat("Madonna Fahmy")
response

Extracted Criteria: {'FullName': 'Madonna Fahmy'}


Unnamed: 0,FullName,Governorate,Phone,Profession,Availability Schedule,Director collaborations,Experience,Preferred Work Geners,Production Credits,Qualifications,Salary,Skills,Title,Work Geners
1,Madonna Fahmy,Giza,1129321929,1st AD (Assistant Director),Fulltime,Christopher Nolan,2 Years,Drama,500000,Bachelors's degree in History,8000.0,Can work under pressure,Female,Sci-Fi


In [10]:
agent = CustomAgent(df)
response = agent.chat("looking for female actor her age  ")
response

Extracted Criteria: {'FullName': 'looking for a male actor with his special skills are available full', 'Profession': 'Actor', 'Title': 'Male', 'Age': 180, 'Height (In CM)': (180, 190), 'Availablity': 'full time', 'Special Skills': ['Are comedy', 'Available full time']}
Scores before sorting:
              FullName  score
56        Andrew Walid      7
65       Youssef Fouad      7
77         Khaled Omar      7
16              Marwan      4
31        Kareem Salah      4
64       Sameh Ibrahim      4
86       Yasmine Akram      4
92   Cristiano Ronaldo      4
95     Youssef Marzouk      4
103        Ahmed Samir      4


Unnamed: 0,FullName,Governorate,Phone,Profession,Age,Availablity,Eye color,Hair Color,Height (In CM),Languages (seperate by commas),Salary,Special Skills,Title,Weight,ProfilePictureUrl
56,Andrew Walid,Cairo,112918210,Actor,22.0,Full Time,Green,Black,184.0,Arabic,8000.0,Strength,Male,77.0,https://t3.ftcdn.net/jpg/03/64/62/36/360_F_364...
65,Youssef Fouad,Alexandria,111122334,Actor,29.0,Full Time,Brown,Black,182.0,"Arabic, English",7900.0,"Stand up comedy, Motorcycling",Male,78.0,
77,Khaled Omar,Cairo,1006550423,Actor,45.0,full time,brown,black,190.0,english,16000.0,"martial arts , stage combat",male,100.0,https://t3.ftcdn.net/jpg/03/64/62/36/360_F_364...


In [11]:
import pickle

In [11]:
with open ('agent_model','wb') as f:
  pickle.dump(agent,f)
 

In [12]:
with open ('agent_model','rb') as f:
    mp=pickle.load(f)

In [13]:
response = agent.chat("looking for female actor witj height 160-179CM , height 50-55 kg and her eye color blue and speak english and french and must be available full time and special skills are sigining and dancing ")


Extracted Criteria: {'FullName': 'looking for a male actor with his special skills are available full', 'Profession': 'Actor', 'Title': 'Male', 'Age': 180, 'Height (In CM)': (180, 190), 'Availablity': 'full time', 'Special Skills': ['Are comedy', 'Available full time']}
Scores before sorting:
              FullName  score
55        Andrew Walid      7
64       Youssef Fouad      7
76         Khaled Omar      7
15              Marwan      4
30        Kareem Salah      4
63       Sameh Ibrahim      4
85       Yasmine Akram      4
91   Cristiano Ronaldo      4
94     Youssef Marzouk      4
101        Ahmed Samir      4


Unnamed: 0,FullName,Governorate,Phone,Profession,Age,Availablity,Eye color,Hair Color,Height (In CM),Languages (seperate by commas),Salary,Special Skills,Title,Weight,ProfilePictureUrl
55,Andrew Walid,Cairo,112918210,Actor,22.0,Full Time,Green,Black,184.0,Arabic,8000.0,Strength,Male,77.0,https://t3.ftcdn.net/jpg/03/64/62/36/360_F_364...
64,Youssef Fouad,Alexandria,111122334,Actor,29.0,Full Time,Brown,Black,182.0,"Arabic, English",7900.0,"Stand up comedy, Motorcycling",Male,78.0,
76,Khaled Omar,Cairo,1006550423,Actor,45.0,full time,brown,black,190.0,english,16000.0,"martial arts , stage combat",male,100.0,https://t3.ftcdn.net/jpg/03/64/62/36/360_F_364...


In [13]:
agent = CustomAgent(df)
response = agent.chat("looking for female actor witj height 160-179CM , height 50-55 kg and her eye color blue and speak english and french and must be available full time and special skills are sigining and dancing ")
response

In [14]:
agent = CustomAgent(df)
response = agent.chat("looking for female actor with height 160-175CM , height 50-55kg and her eye color blue and speak english and french and must be available full-time and her specail skills are singing and dancing")
response

Extracted Criteria: {'FullName': 'looking for female actor with height and her eye color blue and speak english and french and must be available time and her specail skills are singing and', 'Profession': 'Actor', 'Title': 'Female', 'Age': 160, 'Height (In CM)': (160, 175), 'Availablity': 'full-time', 'Eye color': 'Blue', 'Languages (seperate by commas)': ['English and french and must be available full'], 'Special Skills': ['Are singing and dancing']}
Scores before sorting:
           FullName  score
25      Mona Rashid      5
101    Rowan Khaled      5
96   Yasmin Mohamed      5
45    Yasmine Salah      5
87     Salma Hassan      5
4       Amira Nader      4
81       Ruby Samir      4
107   Menna Mahmoud      4
99      Sarah Akram      4
91       Farida Ali      4


Unnamed: 0,FullName,Governorate,Phone,Profession,Age,Availablity,Eye color,Hair Color,Height (In CM),Languages (seperate by commas),Salary,Special Skills,Title,Weight,ProfilePictureUrl
25,Mona Rashid,Luxor,153344556,Actor,25.0,Full Time,Blue,Red,160.0,"Arabic, German",6800.0,"Dancing, Acting, Cooking",Female,57.0,
101,Rowan Khaled,Gharbia,1211001190,Actor,20.0,Full Time,Blue,Blonde,170.0,French,10000.0,Gymnastics,Female,60.0,https://t3.ftcdn.net/jpg/03/64/62/36/360_F_364...
96,Yasmin Mohamed,Matrouh,1167956076,Actor,22.0,full time,blue,blonde,168.0,"French, Arabic, English",12000.0,"proficient in playing musical instruments, ski...",female,50.0,https://t3.ftcdn.net/jpg/03/64/62/36/360_F_364...
