In [None]:
from dotenv import load_dotenv
import os
import openai

# Charger les variables d'environnement depuis un fichier .env
load_dotenv()

# Récupérer la clé API directement depuis les variables d'environnement
openai.api_key = os.getenv("OPENAI_API_KEY")
from openai import OpenAI
import pandas as pd 
import geopy
from geopy.distance import geodesic
from geopy.geocoders import Nominatim
import certifi
import ssl 
from geopy.exc import GeocoderTimedOut
import time
import numpy as np
import json

from unidecode import unidecode

client = OpenAI()

In [3]:
def get_lat_long(location_name):
    ssl_context = ssl.create_default_context(cafile=certifi.where())
    geolocator = Nominatim(user_agent="france_zip_code_locator", ssl_context=ssl_context)
    try:
        location = geolocator.geocode(f"{location_name}, France")
        if location:
            return location.latitude, location.longitude
    except GeocoderTimedOut:
        print("Geocoding service timed out.")
    return None

with open('postal_code_coordinates.json', 'r') as file: #lire
    postal_code_coordinates = json.load(file)

def calculate_distance(row, client_location):
    vehicle_coords = postal_code_coordinates.get(row['ville'])
    if not vehicle_coords or len(vehicle_coords) != 2:
        # Gérer les cas où les coordonnées sont manquantes ou incorrectes
        return float('inf')
    
    return geodesic(client_location, vehicle_coords).kilometers

In [22]:
df = pd.read_csv('../sandbox/mary/myvo/vo_vehicle (1).csv', encoding="ISO-8859-1", sep=";")

In [39]:
df.groupby(["marque", "modele"]).count()["id"].reset_index().sort_values(by="id", ascending=False).head(30)

Unnamed: 0,marque,modele,id
179,PEUGEOT,208,462
176,PEUGEOT,2008,382
20,CITROEN,C3,278
189,PEUGEOT,308,216
185,PEUGEOT,3008,184
222,RENAULT,CLIO V,141
219,RENAULT,CAPTUR,122
184,PEUGEOT,208 ELECTRIQUE,95
21,CITROEN,C3 AIRCROSS,87
52,DACIA,SANDERO,75


In [84]:
response = client.chat.completions.create(messages=[{"role": "system", "content": "Réponds à la description suivante avec un JSON comportant la clé 'vehicle' qui correspond à une liste de dictionnaires comportant chacun une clé 'brand', une clé 'model' et une clé 'version'. Tu ne précises la version que si elle est explicitement présente dans la description (null sinon). Parmi tes recommandations, tu dois lorsque c'est possible proposer en priorité des modèles Peugeot, Renault, Citroën, Dacia. Description : twingo électrique"}], model="gpt-4o-mini", response_format={"type": "json_object"})

In [85]:
json.loads(response.choices[0].message.content)

{'vehicle': [{'brand': 'Renault', 'model': 'Twingo', 'version': 'Électrique'},
  {'brand': 'Peugeot', 'model': 'e-208', 'version': None},
  {'brand': 'Citroën', 'model': 'e-C4', 'version': None},
  {'brand': 'Dacia', 'model': 'Spring', 'version': None}]}

In [140]:
def recommender(full_description: str, 
                zip_code: str,
                max_mileage: int = None, 
                max_price: int = None, 
                brand: str = None, 
                model: str = None, 
                version: str = None, 
                energy: str = None,
                gearbox: str = None,
                seats_number: int = None,
                color: str = None):
    
    df = pd.read_csv('../sandbox/mary/myvo/vo_vehicle (1).csv', encoding="ISO-8859-1", sep=";")
    
    if brand:
        filt = df['marque'] == unidecode(brand).upper()
        if model:
            filt &= df['modele'] == unidecode(model).upper()
    
    else:
        llm_call = client.chat.completions.create(messages=[{"role": "system", "content": f"Réponds à la description suivante avec un JSON comportant la clé 'vehicles' qui correspond à une liste de dictionnaires comportant chacun une clé 'brand', une clé 'model' et une clé 'version'. Tu ne précises la version que si elle est explicitement présente dans la description (null sinon). Parmi tes recommandations, tu dois lorsque c'est possible proposer en priorité des modèles Peugeot, Renault, Citroën, Dacia. Description : {full_description}"}], model="gpt-4o-mini", response_format={"type": "json_object"})
        eligible_vehicles = json.loads(llm_call.choices[0].message.content).get("vehicles", [])
        print(eligible_vehicles)
        eligible_brands = [unidecode(vehicle.get("brand")).upper() for vehicle in eligible_vehicles]
        eligible_models = [unidecode(vehicle.get("model")).upper() for vehicle in eligible_vehicles]

        filt = (df['marque'].isin(eligible_brands)) & (df['modele'].isin(eligible_models))

    if max_mileage:
        filt &= df['kilometres'] <= max_mileage * 1.05

    if max_price:
        filt &= df['prix_ttc'] <= max_price * 1.1
    
    if energy:
        filt &= df['energie'] == energy
    
    if gearbox:
        filt &= df['type_boite'] == gearbox
    
    if seats_number:
        filt &= df['nb_places'] == seats_number
    
    if color:
        filt &= df['couleur'] == color

    df_filtered = df[filt]
    
    df_filtered['ville'] = df_filtered["ville"].apply(lambda x: "0"*(5 - len(str(x))) + str(x))
    client_location = get_lat_long(zip_code)
    df_filtered['distance'] = df_filtered.apply(lambda row: calculate_distance(row, client_location), axis=1)

    try:
        third_smallest_distance = sorted(df_filtered['distance'].unique())[2]
    except:
        third_smallest_distance = df_filtered['distance'].min()

    distance_filts = [df_filtered[df_filtered['distance'] == df_filtered['distance'].min()], df_filtered[df_filtered['distance'] <= third_smallest_distance], df_filtered[df_filtered['distance'] <= 50], df_filtered]

    for _, intent in enumerate(distance_filts):

        if intent.shape[0] > 2:
            break

    return intent[['ville', 'distance', 'marque', 'modele', 'prix_ttc']]


In [151]:
recommender("voiture électrique", zip_code="14000")

[{'brand': 'Peugeot', 'model': 'e-208', 'version': None}, {'brand': 'Renault', 'model': 'Zoe', 'version': None}, {'brand': 'Citroën', 'model': 'ë-C4', 'version': None}, {'brand': 'Dacia', 'model': 'Spring', 'version': None}]


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_filtered['ville'] = df_filtered["ville"].apply(lambda x: "0"*(5 - len(str(x))) + str(x))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_filtered['distance'] = df_filtered.apply(lambda row: calculate_distance(row, client_location), axis=1)


Unnamed: 0,ville,distance,marque,modele,prix_ttc
1261,14000,0.0,CITROEN,E-C4,23990
1262,14000,0.0,CITROEN,E-C4,23990
1263,14000,0.0,CITROEN,E-C4,23790
1264,14000,0.0,CITROEN,E-C4,23490
1265,14000,0.0,CITROEN,E-C4,23990
1267,14000,0.0,CITROEN,E-C4,23990
1268,14000,0.0,CITROEN,E-C4,23990
1269,14000,0.0,CITROEN,E-C4,23990
1272,14000,0.0,CITROEN,E-C4,23990
2199,14000,0.0,CITROEN,E-C4,23990
