In [None]:
import os
import sys
import google.generativeai as genai

# Check Python version
if sys.version_info < (3, 9):
    print("This script requires Python 3.9 or higher")
    sys.exit(1)

# Print library version for debugging
print(f"Google GenerativeAI version: {genai.__version__}")

# Configure the API key
genai.configure(api_key="AIzaSyDwJPtIOn-DNEX5RR29jh0wjdzp3L5grr0")

def list_available_models():
    try:
        for m in genai.list_models():
            print(m.name)
    except Exception as e:
        print(f"Error listing models: {e}")
        sys.exit(1)

def chat_loop():
    try:
        # Configure the generation parameters
        generation_config = genai.GenerationConfig(
            temperature=1.0,
            top_p=0.95,
            top_k=64,
            max_output_tokens=8192,
        )

        # Initialize the model
        model = genai.GenerativeModel('gemini-pro')

        # Start the chat
        chat = model.start_chat(history=[])

        # Initial context setting
        initial_prompt = """vous êtes un chatbot d'assistance amical de la société Lapin Malin.
        Votre objectif principal est de recommander une solution au problème de maintenance matérielle
        ou logicielle de l'ordinateur des utilisateurs"""

        response = chat.send_message(initial_prompt)
        print("\nBot:", response.text)

        print("\nChat initialized. Type 'quit' or 'exit' to end the conversation.")
        while True:
            try:
                user_input = input("\nYou: ")
                if user_input.lower() in ['quit', 'exit']:
                    print("Ending chat session...")
                    break

                response = chat.send_message(user_input)
                print(f"\nBot: {response.text}")

            except Exception as e:
                print(f"Error in chat: {str(e)}")
                break

    except Exception as e:
        print(f"Initialization error: {str(e)}")

if __name__ == "__main__":
    list_available_models()
    chat_loop()

Google GenerativeAI version: 0.8.3
models/chat-bison-001
models/text-bison-001
models/embedding-gecko-001
models/gemini-1.0-pro-latest
models/gemini-1.0-pro
models/gemini-pro
models/gemini-1.0-pro-001
models/gemini-1.0-pro-vision-latest
models/gemini-pro-vision
models/gemini-1.5-pro-latest
models/gemini-1.5-pro-001
models/gemini-1.5-pro-002
models/gemini-1.5-pro
models/gemini-1.5-pro-exp-0801
models/gemini-1.5-pro-exp-0827
models/gemini-1.5-flash-latest
models/gemini-1.5-flash-001
models/gemini-1.5-flash-001-tuning
models/gemini-1.5-flash
models/gemini-1.5-flash-exp-0827
models/gemini-1.5-flash-002
models/gemini-1.5-flash-8b
models/gemini-1.5-flash-8b-001
models/gemini-1.5-flash-8b-latest
models/gemini-1.5-flash-8b-exp-0827
models/gemini-1.5-flash-8b-exp-0924
models/embedding-001
models/text-embedding-004
models/aqa

Bot: 🌟 Salut ! Je suis Jojo, le chatbot d'assistance de Lapin Malin. Je suis là pour vous aider avec tous vos problèmes informatiques. 🌟

Vous rencontrez des difficultés a

KeyboardInterrupt: Interrupted by user

In [None]:
!pip install sentence_transformers

Collecting sentence_transformers
  Downloading sentence_transformers-3.2.1-py3-none-any.whl.metadata (10 kB)
Downloading sentence_transformers-3.2.1-py3-none-any.whl (255 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m255.8/255.8 kB[0m [31m3.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: sentence_transformers
Successfully installed sentence_transformers-3.2.1


In [None]:
import pandas as pd
from geopy.geocoders import Nominatim
from geopy.distance import geodesic
import numpy as np
from functools import lru_cache
import pickle
import os
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity

class ExpertRecommender:
    def __init__(self, data_csv):
        self.df = pd.read_csv(data_csv, sep=';')
        self.geolocator = Nominatim(user_agent="expert_recommender")

        # Initialiser le modèle de similarité sémantique
        self.model = SentenceTransformer('all-MiniLM-L6-v2')

        # Pré-calculer les embeddings des domaines
        self.domain_embeddings = self._precalculate_domain_embeddings()

        # Charger le cache des coordonnées depuis un fichier s'il existe
        self.cache_file = 'city_coords_cache.pkl'
        if os.path.exists(self.cache_file):
            with open(self.cache_file, 'rb') as f:
                self.city_coords = pickle.load(f)
        else:
            self.city_coords = {}

        # Pré-calculer toutes les coordonnées des experts
        self._precalculate_expert_coordinates()

    def _precalculate_domain_embeddings(self):
        """Pré-calculer les embeddings pour tous les domaines d'expertise"""
        unique_domains = self.df['Domaine'].unique()
        embeddings = self.model.encode(unique_domains)
        return dict(zip(unique_domains, embeddings))

    def _calculate_domain_similarity(self, user_domain):
        """Calculer la similarité sémantique entre le domaine de l'utilisateur et tous les domaines d'experts"""
        user_embedding = self.model.encode(user_domain)

        # Calculer les similarités pour chaque expert
        similarities = []
        for domain in self.df['Domaine']:
            domain_embedding = self.domain_embeddings[domain]
            similarity = cosine_similarity(
                user_embedding.reshape(1, -1),
                domain_embedding.reshape(1, -1)
            )[0][0]
            similarities.append(similarity)

        return np.array(similarities)

    def _precalculate_expert_coordinates(self):
        """Pré-calculer les coordonnées de toutes les villes des experts."""
        unique_locations = self.df[['Ville', 'Pays']].drop_duplicates()
        for _, row in unique_locations.iterrows():
            self._get_coordinates(row['Ville'], row['Pays'])

        # Sauvegarder le cache
        with open(self.cache_file, 'wb') as f:
            pickle.dump(self.city_coords, f)

    @lru_cache(maxsize=1000)
    def _get_coordinates(self, city, country):
        """Obtenir les coordonnées géographiques d'une ville avec mise en cache."""
        location_key = f"{city}, {country}"
        if location_key not in self.city_coords:
            try:
                location = self.geolocator.geocode(location_key)
                if location:
                    self.city_coords[location_key] = (location.latitude, location.longitude)
                else:
                    return None
            except:
                return None
        return self.city_coords[location_key]

    def _calculate_distances_vectorized(self, user_coords):
        """Calculer les distances pour tous les experts en une seule fois."""
        if not user_coords:
            return np.zeros(len(self.df))

        distances = []
        for _, expert in self.df[['Ville', 'Pays']].iterrows():
            expert_coords = self._get_coordinates(expert['Ville'], expert['Pays'])
            if expert_coords:
                distance = geodesic(expert_coords, user_coords).kilometers
            else:
                distance = float('inf')
            distances.append(distance)

        max_distance = 5000
        return np.maximum(0, 1 - (np.array(distances) / max_distance))

    def get_recommendations(self, user_city, user_country, user_domain, weights=None):
        """Version optimisée des recommandations avec similarité sémantique des domaines."""
        if weights is None:
            weights = {
                'distance': 0.4,
                'availability': 0.2,
                'domain': 0.7
            }

        # Obtenir les coordonnées de l'utilisateur une seule fois
        user_coords = self._get_coordinates(user_city, user_country)

        # Calculer tous les scores en une fois
        distance_scores = self._calculate_distances_vectorized(user_coords)
        availability_scores = self.df['Disponibilité (heures/semaine)'].values / 15
        domain_scores = self._calculate_domain_similarity(user_domain)

        # Calculer le score total vectorisé
        total_scores = (
            distance_scores * weights['distance'] +
            availability_scores * weights['availability'] +
            domain_scores * weights['domain']
        )

        # Créer un DataFrame avec les scores pour un tri efficace
        results_df = pd.DataFrame({
            'nom': self.df['Nom'],
            'pays': self.df['Pays'],
            'ville': self.df['Ville'],
            'domaine': self.df['Domaine'],
            'disponibilite': self.df['Disponibilité (heures/semaine)'],
            'score_domaine': domain_scores,
            'score': total_scores
        })

        # Trier et sélectionner les 10 meilleurs
        top_recommendations = results_df.nlargest(20, 'score')

        return top_recommendations.to_dict('records')

# Exemple d'utilisation:
if __name__ == "__main__":
    recommender = ExpertRecommender("user.csv")

    recommendations = recommender.get_recommendations(
        user_city="Garoua",
        user_country="Cameroon",
        user_domain="Artificial Intelligence| Database Administrator"
    )

    print("\nRecommandations d'experts:")
    print("-" * 50)
    for i, rec in enumerate(recommendations, 1):
        print(f"{i}. {rec['nom']}")
        print(f"   Localisation: {rec['ville']}, {rec['pays']}")
        print(f"   Domaine: {rec['domaine']}")
        print(f"   Similarité du domaine: {rec['score_domaine']:.2f}")
        print(f"   Disponibilité: {rec['disponibilite']} heures/semaine")
        print(f"   Score total: {rec['score']:.2f}")
        print("-" * 50)




Recommandations d'experts:
--------------------------------------------------
1. Thierry Ngan
   Localisation: Garoua, Cameroon
   Domaine: IoT Development | Artificial Intelligence| Database Administrator| Web Development
   Similarité du domaine: 0.59
   Disponibilité: 12 heures/semaine
   Score total: 0.97
--------------------------------------------------
2. Martin Fonkem
   Localisation: Bamenda, Cameroon
   Domaine: Data & Business Intelligence | Artificial Intelligence
   Similarité du domaine: 0.64
   Disponibilité: 12 heures/semaine
   Score total: 0.97
--------------------------------------------------
3. Wilfried Ngan
   Localisation: Yaoundé, Cameroon
   Domaine: Cyber-security & Network | Database Administrator
   Similarité du domaine: 0.60
   Disponibilité: 15 heures/semaine
   Score total: 0.97
--------------------------------------------------
4. Tolulope Adebayo
   Localisation: Ibadan, Nigeria
   Domaine: Data & Business Intelligence | Artificial Intelligence
   Sim

In [None]:
import pandas as pd
from geopy.geocoders import Nominatim
from geopy.distance import geodesic
import numpy as np
from functools import lru_cache
import pickle
import os
from transformers import pipeline

class ExpertRecommender:
    def __init__(self, data_csv):
        self.df = pd.read_csv(data_csv, sep=';')
        self.geolocator = Nominatim(user_agent="expert_recommender")

        # Initialiser le pipeline de zero-shot classification
        self.classifier = pipeline("zero-shot-classification",
                                 model='amaye15/Stack-Overflow-Zero-Shot-Classification')

        # Charger le cache des coordonnées depuis un fichier s'il existe
        self.cache_file = 'city_coords_cache.pkl'
        if os.path.exists(self.cache_file):
            with open(self.cache_file, 'rb') as f:
                self.city_coords = pickle.load(f)
        else:
            self.city_coords = {}

        # Pré-calculer toutes les coordonnées des experts
        self._precalculate_expert_coordinates()

    def _calculate_domain_similarity(self, user_domain):
        """
        Calculer la similarité entre le domaine de l'utilisateur et tous les domaines d'experts
        en utilisant le zero-shot classification
        """
        unique_domains = self.df['Domaine'].unique().tolist()

        # Utiliser le classifier pour obtenir les scores de similarité
        result = self.classifier(
            sequences=user_domain,
            candidate_labels=unique_domains,
            multi_label=True
        )

        # Créer un dictionnaire de correspondance domaine -> score
        domain_scores = dict(zip(result['labels'], result['scores']))

        # Calculer les scores pour chaque expert
        similarities = []
        for domain in self.df['Domaine']:
            similarities.append(domain_scores.get(domain, 0.0))

        return np.array(similarities)

    def _precalculate_expert_coordinates(self):
        """Pré-calculer les coordonnées de toutes les villes des experts."""
        unique_locations = self.df[['Ville', 'Pays']].drop_duplicates()
        for _, row in unique_locations.iterrows():
            self._get_coordinates(row['Ville'], row['Pays'])

        # Sauvegarder le cache
        with open(self.cache_file, 'wb') as f:
            pickle.dump(self.city_coords, f)

    @lru_cache(maxsize=1000)
    def _get_coordinates(self, city, country):
        """Obtenir les coordonnées géographiques d'une ville avec mise en cache."""
        location_key = f"{city}, {country}"
        if location_key not in self.city_coords:
            try:
                location = self.geolocator.geocode(location_key)
                if location:
                    self.city_coords[location_key] = (location.latitude, location.longitude)
                else:
                    return None
            except:
                return None
        return self.city_coords[location_key]

    def _calculate_distances_vectorized(self, user_coords):
        """Calculer les distances pour tous les experts en une seule fois."""
        if not user_coords:
            return np.zeros(len(self.df))

        distances = []
        for _, expert in self.df[['Ville', 'Pays']].iterrows():
            expert_coords = self._get_coordinates(expert['Ville'], expert['Pays'])
            if expert_coords:
                distance = geodesic(expert_coords, user_coords).kilometers
            else:
                distance = float('inf')
            distances.append(distance)

        max_distance = 5000
        return np.maximum(0, 1 - (np.array(distances) / max_distance))

    def get_recommendations(self, user_city, user_country, user_domain, weights=None):
        """Obtenir des recommandations d'experts basées sur la distance, la disponibilité et la similarité de domaine."""
        if weights is None:
            weights = {
                'distance': 0.3,
                'availability': 0.2,
                'domain': 0.5
            }

        # Obtenir les coordonnées de l'utilisateur une seule fois
        user_coords = self._get_coordinates(user_city, user_country)

        # Calculer tous les scores en une fois
        distance_scores = self._calculate_distances_vectorized(user_coords)
        availability_scores = self.df['Disponibilité (heures/semaine)'].values / 15
        domain_scores = self._calculate_domain_similarity(user_domain)

        # Calculer le score total vectorisé
        total_scores = (
            distance_scores * weights['distance'] +
            availability_scores * weights['availability'] +
            domain_scores * weights['domain']
        )

        # Créer un DataFrame avec les scores pour un tri efficace
        results_df = pd.DataFrame({
            'nom': self.df['Nom'],
            'pays': self.df['Pays'],
            'ville': self.df['Ville'],
            'domaine': self.df['Domaine'],
            'disponibilite': self.df['Disponibilité (heures/semaine)'],
            'score_domaine': domain_scores,
            'score': total_scores
        })

        # Trier et sélectionner les 20 meilleurs
        top_recommendations = results_df.nlargest(20, 'score')

        return top_recommendations.to_dict('records')

# Exemple d'utilisation:
if __name__ == "__main__":
    recommender = ExpertRecommender("user.csv")

    recommendations = recommender.get_recommendations(
        user_city="Limbe",
        user_country="Cameroon",
        user_domain="Software Engineering"
    )

    print("\nRecommandations d'experts:")
    print("-" * 50)
    for i, rec in enumerate(recommendations, 1):
        print(f"{i}. {rec['nom']}")
        print(f"   Localisation: {rec['ville']}, {rec['pays']}")
        print(f"   Domaine: {rec['domaine']}")
        print(f"   Similarité du domaine: {rec['score_domaine']:.2f}")
        print(f"   Disponibilité: {rec['disponibilite']} heures/semaine")
        print(f"   Score total: {rec['score']:.2f}")
        print("-" * 50)


Recommandations d'experts:
--------------------------------------------------
1. **Germain Ngan
   Localisation: Kumba, Cameroon
   Domaine: Software Engineering
   Similarité du domaine: 1.00
   Disponibilité: 15 heures/semaine
   Score total: 1.00
--------------------------------------------------
2. **Paul Ngan
   Localisation: Limbe, Cameroon
   Domaine: Software Engineering
   Similarité du domaine: 1.00
   Disponibilité: 10 heures/semaine
   Score total: 0.93
--------------------------------------------------
3. Fotsing Engoulou
   Localisation: Garoua, Cameroon
   Domaine: Software Engineering
   Similarité du domaine: 1.00
   Disponibilité: 13 heures/semaine
   Score total: 0.93
--------------------------------------------------
4. **Fotsing Engoulou
   Localisation: Garoua, Cameroon
   Domaine: Software Engineering
   Similarité du domaine: 1.00
   Disponibilité: 13 heures/semaine
   Score total: 0.93
--------------------------------------------------
5. Zainab Ahmed
   Local

In [None]:
from transformers import pipeline
#cl=pipeline("zero-shot-classification", model="michaelp11/zeroshot-classification-de")
cl=pipeline("zero-shot-classification", model='amaye15/Stack-Overflow-Zero-Shot-Classification')



Error while fetching `HF_TOKEN` secret value from your vault: 'Requesting secret HF_TOKEN timed out. Secrets can only be fetched when running from the Colab UI.'.
You are not authenticated with the Hugging Face Hub in this notebook.
If the error persists, please let us know by opening an issue on GitHub (https://github.com/huggingface/huggingface_hub/issues/new).


config.json:   0%|          | 0.00/1.03k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.74G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.36k [00:00<?, ?B/s]

spm.model:   0%|          | 0.00/2.46M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/8.65M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/23.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/173 [00:00<?, ?B/s]

In [None]:
import pandas as pd
expert= pd.read_csv('experts1.csv', index_col=0)
expert['desc']=expert['competences']+'.'+expert['projets']
expert.iloc[0]['desc']

"machine learning;deep learning;python;data analysis.Prédiction de fraudes bancaires;Classification d'images satellites"

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer
vectorizer = TfidfVectorizer(ngram_range=(1,2))

tfidf = vectorizer.fit_transform(expert["desc"])

from sklearn.metrics.pairwise import cosine_similarity
import numpy as np


In [None]:
text=[expert.iloc[3]['desc']]
label=["data science","autre"]
r=cl(text,label)
r
text=[expert.iloc[3]['desc']]
label=["data science","autre"]
r=cl(text,label)
r



[{'sequence': "javascript;react;vue.js;node.js;full-stack development.Développement d'une plateforme SaaS;Création d'une app mobile hybride",
  'labels': ['autre', 'data science'],
  'scores': [0.868561863899231, 0.13143813610076904]}]

In [None]:
text=["Comment peut-on intégrer l'intelligence artificielle et la blockchain pour créer un système de vote électronique sécurisé et transparent ?"]
label=["Développement logiciel","Informatique quantique","Gestion de projet IT","Architecture logicielle","Intelligence artificielle et Machine Learning","Sécurité informatique","Bases de données et Big Data","Cloud Computing","Réseaux et systèmes","Internet des objets (IoT)","Calcul haute performance","Interface homme-machine (IHM)","Blockchain et cryptomonnaies","Réalité virtuelle (VR) et Réalité augmentée (AR)","Bioinformatique"]
r1=cl(text,label)
r1

In [None]:
text = ["je ne sais pas comment mettre en pratique devsecops pour un projet sur l'energie"]

In [None]:
#predict tout les expert pour avoir les pourcentage par categorie
def predallexp(exp, label):
    exppred = []
    for i in range(len(exp)):
        text = [exp.iloc[i]['desc']]
        r = cl(text, label)
        exppred.append(r)
    return exppred

In [None]:
#cette operation prend enormement de temp
exppred = predallexp(expert, label)


In [None]:
def calculate_cosine_similarity(text1, text2):
    vectorizer = TfidfVectorizer()
    tfidf_matrix = vectorizer.fit_transform([text1, text2])
    return cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])[0][0]

In [None]:
#predict la question du novice
pointrec = []
cosine_scores = []
questpred = cl(text, label)
pointrec = []
cosine_scores = []
for i in range(len(exppred)):
    pointrec.append(eval(exppred[i], questpred))
    cosine_score = calculate_cosine_similarity(text[0], expert.iloc[i]['desc'])
    cosine_scores.append(cosine_score)

# Normalize pointrec and cosine_scores
pointrec = np.array(pointrec)
cosine_scores = np.array(cosine_scores)
pointrec = (pointrec / max(pointrec)) * 25  # Scale to 25 points
cosine_scores = (cosine_scores / max(cosine_scores)) * 25  # Scale to 25 points

# Combine scores
total_scores = pointrec + cosine_scores

expert['point'] = total_scores.astype(int)

result = expert.sort_values(by='point', ascending=False)

result.head(10)

In [None]:
!pip install groq

In [None]:
from google.colab import userdata
userdata.get('GROQ_API_KEY')

In [None]:
from groq import Groq

client = Groq(api_key='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')
completion = client.chat.completions.create(
    model="llama3-8b-8192",
    messages=[
        {
            "role": "user",
            "content": "i want to reate a recomendation system\n\n\n\n"
        },
        {
            "role": "assistant",
            "content": "Creating a recommendation system is a fascinating task. Here's a high-level overview of the process, and we can dive deeper into each step as needed:\n\n**Step 1: Define Your Goal**\n\nWhat do you want your recommendation system to achieve? Do you want to:\n\n* Suggest products or services to customers based on their behavior?\n* Offer personalized content to users?\n* Help users discover new items that match their interests?\n\n**Step 2: Gather Data**\n\nYou'll need a dataset to train and test your recommendation system. This could include:\n\n* User-item interactions (e.g., ratings, clicks, purchases)\n* User profiles (e.g., demographics, interests, preferences)\n* Item attributes (e.g., categories, genres, descriptions)\n* Contextual data (e.g., time of day, location, device)\n\n**Step 3: Choose a Recommendation Algorithm**\n\nThere are many algorithms to choose from, each with its strengths and weaknesses. Some popular ones include:\n\n* Collaborative filtering (e.g., user-based, item-based, matrix factorization)\n* Content-based filtering\n* Knowledge-based systems\n* Hybrid approaches\n\n**Step 4: Prepare and Preprocess Data**\n\nYou'll need to clean, transform, and preprocess your data to make it suitable for the recommendation algorithm. This may involve:\n\n* Handling missing values and outliers\n* Normalizing and scaling data\n* Converting categorical variables to numerical representations\n\n**Step 5: Train and Evaluate the Model**\n\nSplit your data into training and testing sets, and train the model on the training data. Use metrics such as precision, recall, and A/B testing to evaluate the model's performance on the testing data.\n\n**Step 6: Deploy and Monitor the System**\n\n Implement the recommendation system in your application, and continually monitor its performance and user feedback. You may need to retrain the model periodically to adapt to changing user behavior and preferences.\n\n**Additional Considerations**\n\n* **Cold Start Problem**: What to do when there's limited data for a new user or item?\n* **Sparsity**: How to handle sparse data, where many users or items have few interactions?\n* **Scalability**: How to ensure the system can handle large volumes of data and user traffic?\n\nLet's dive deeper into any of these steps or discuss specific requirements you have for your recommendation system."
        }
    ],
    temperature=1,
    max_tokens=1024,
    top_p=1,
    stream=True,
    stop=None,
)
for chunk in completion:
    print(chunk.choices[0].delta.content or "", end="")
