# AI-Driven Networking & Matchmaking System

In [None]:
import json
import pandas as pd
import numpy as np
import scipy.sparse as sp
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.metrics.pairwise import cosine_similarity
import networkx as nx


In [None]:
with open('/content/fake_user_profiles.json', 'r') as f:
    users = json.load(f)

In [None]:
df = pd.DataFrame(users)
df.tail()

Unnamed: 0,name,email,industry,skills,project_interests,past_validation_activities,location,experience
95,Donna Hickman,charles68@key.com,Healthcare,"[Cloud Computing, UI/UX Design, Machine Learni...","[Cybersecurity, Web Development, E-commerce]","[Code Review, Peer Feedback]",Mistyfurt,12 years
96,David Barker,barronmario@gmail.com,Healthcare,"[Java, Python, Project Management, Cloud Compu...","[Web Development, E-commerce, Blockchain]","[Product Testing, Code Review]",North Mark,17 years
97,Dana Russell,amy25@combs.info,Healthcare,"[Machine Learning, UI/UX Design]","[E-commerce, AI Research, Cybersecurity]","[Security Audits, Peer Feedback]",East Leahmouth,20 years
98,Samuel Garcia,monicaroberts@hotmail.com,Healthcare,"[UI/UX Design, Machine Learning, Java, Cloud C...","[E-commerce, Blockchain]","[Product Testing, Security Audits]",Smithfort,16 years
99,Bryan Dyer MD,corey53@yahoo.com,Education,"[Python, Cloud Computing]","[E-commerce, Blockchain]","[Product Testing, Peer Feedback]",Kathrynside,2 years


In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 8 columns):
 #   Column                      Non-Null Count  Dtype 
---  ------                      --------------  ----- 
 0   name                        100 non-null    object
 1   email                       100 non-null    object
 2   industry                    100 non-null    object
 3   skills                      100 non-null    object
 4   project_interests           100 non-null    object
 5   past_validation_activities  100 non-null    object
 6   location                    100 non-null    object
 7   experience                  100 non-null    object
dtypes: object(8)
memory usage: 6.4+ KB


In [None]:
# lists are properly formatted
for col in ['skills', 'project_interests', 'industry', 'past_validation_activities', 'experience']:
    df[col] = df[col].apply(lambda x: x if isinstance(x, list) else [])

In [None]:
# now we are doing feature Engineering
mlb = MultiLabelBinarizer()

In [None]:
# convert lst into binary features
skills_encoded = pd.DataFrame(mlb.fit_transform(df['skills']), columns=mlb.classes_)
interests_encoded = pd.DataFrame(mlb.fit_transform(df['project_interests']), columns=mlb.classes_)
industry_encoded = pd.DataFrame(mlb.fit_transform(df['industry']), columns=mlb.classes_)
past_validation_encoded = pd.DataFrame(mlb.fit_transform(df['past_validation_activities']), columns=mlb.classes_)

In [None]:
# Combine Features
features = pd.concat([skills_encoded, interests_encoded, industry_encoded, past_validation_encoded], axis=1)
features.head()

Unnamed: 0,Cloud Computing,Java,Machine Learning,Project Management,Python,UI/UX Design,AI Research,Blockchain,Cybersecurity,E-commerce,IoT,Web Development,Code Review,Peer Feedback,Product Testing,Security Audits
0,1,0,1,1,1,0,0,0,0,1,0,0,0,1,0,0
1,1,1,0,0,0,1,0,1,0,0,1,0,1,0,0,0
2,1,0,0,1,0,1,0,0,0,0,1,0,0,0,1,1
3,1,1,0,1,0,1,1,0,1,1,0,0,0,1,0,0
4,1,1,0,1,1,0,0,1,1,0,0,0,1,0,0,1


In [None]:
features.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 16 columns):
 #   Column              Non-Null Count  Dtype
---  ------              --------------  -----
 0   Cloud Computing     100 non-null    int64
 1   Java                100 non-null    int64
 2   Machine Learning    100 non-null    int64
 3   Project Management  100 non-null    int64
 4   Python              100 non-null    int64
 5   UI/UX Design        100 non-null    int64
 6   AI Research         100 non-null    int64
 7   Blockchain          100 non-null    int64
 8   Cybersecurity       100 non-null    int64
 9   E-commerce          100 non-null    int64
 10  IoT                 100 non-null    int64
 11  Web Development     100 non-null    int64
 12  Code Review         100 non-null    int64
 13  Peer Feedback       100 non-null    int64
 14  Product Testing     100 non-null    int64
 15  Security Audits     100 non-null    int64
dtypes: int64(16)
memory usage: 12.6 KB


In [None]:
# similarity matrix
similarity_matrix = cosine_similarity(features)
similarity_matrix

array([[1.        , 0.16666667, 0.33333333, ..., 0.46291005, 0.4330127 ,
        0.66666667],
       [0.16666667, 1.        , 0.5       , ..., 0.15430335, 0.57735027,
        0.33333333],
       [0.33333333, 0.5       , 1.        , ..., 0.3086067 , 0.57735027,
        0.33333333],
       ...,
       [0.46291005, 0.15430335, 0.3086067 , ..., 1.        , 0.53452248,
        0.3086067 ],
       [0.4330127 , 0.57735027, 0.57735027, ..., 0.53452248, 1.        ,
        0.57735027],
       [0.66666667, 0.33333333, 0.33333333, ..., 0.3086067 , 0.57735027,
        1.        ]])

In [None]:
# create graph
G = nx.Graph()

# add users as nodes
for _, row in df.iterrows():
    G.add_node(row['email'], **row)

# ad edges based on shared exp..
for _, row in df.iterrows():
    for exp in row['experience']:
        G.add_edge(row['email'], exp)

# recommendations using pagerank..
recommendations = nx.pagerank(G)
print(recommendations)

{'ericwells@boone.org': 0.010000000000000005, 'leemichael@fletcher-mitchell.com': 0.010000000000000005, 'pmurray@yahoo.com': 0.010000000000000005, 'perezkelsey@yahoo.com': 0.010000000000000005, 'allenisaac@yahoo.com': 0.010000000000000005, 'wlewis@miller.net': 0.010000000000000005, 'jonesteresa@ortiz.com': 0.010000000000000005, 'christopher70@daniel.com': 0.010000000000000005, 'elizabethkelly@yahoo.com': 0.010000000000000005, 'davidcabrera@sanchez-wade.com': 0.010000000000000005, 'timothyyoung@stephens.org': 0.010000000000000005, 'stonekenneth@gmail.com': 0.010000000000000005, 'pedropierce@hotmail.com': 0.010000000000000005, 'carneysean@harris.info': 0.010000000000000005, 'butlerjasmine@chung.info': 0.010000000000000005, 'pamelapace@solis.info': 0.010000000000000005, 'onewman@hernandez-charles.com': 0.010000000000000005, 'roger91@hunter-martinez.org': 0.010000000000000005, 'robert59@hotmail.com': 0.010000000000000005, 'chambersjoseph@terry.info': 0.010000000000000005, 'fordgina@ponce.i

In [None]:
# Function to Recommend Users
def recommend_users(user_email, top_k=5):
  if user_email not in df['email'].values:
    return []
  user_index = df[df['email'] == user_email].index[0]
  similarities = similarity_matrix[user_index]
  top_indices = similarities.argsort()[-top_k-1:-1][::-1]

  return df.iloc[top_indices][['name', 'email', 'skills', 'project_interests']]


user_email = df['email']
recommend_users(user_email, top_k=5)

Unnamed: 0,name,email,skills,project_interests
75,John Carroll,megan39@smith.com,"[Python, Project Management, Machine Learning]",[E-commerce]
87,Chelsea Norris,kenneth80@gmail.com,"[Project Management, Machine Learning]",[E-commerce]
14,Joseph Fox,butlerjasmine@chung.info,"[Python, Machine Learning, Cloud Computing]",[E-commerce]
95,Donna Hickman,charles68@key.com,"[Cloud Computing, UI/UX Design, Machine Learni...","[Cybersecurity, Web Development, E-commerce]"
69,Melissa Howell,pbaker@marshall.com,"[Machine Learning, Project Management, Cloud C...","[AI Research, Cybersecurity, IoT]"
