In [None]:
import pandas as pd

In [None]:
results_df = pd.read_csv('final_ranked_resumes.csv')
results_df.head()

In [None]:
# go inside the Final_Recommendations_Letters folder and create a csv where there are 2 columns with all dir names and subdir names

import os
import csv
import re

import zipfile

with zipfile.ZipFile('Final_Recommendation_Letters.zip', 'r') as zip_ref:
    zip_ref.extractall('./Final_Recommendation_Letters')

In [None]:
# get the absolute path of the current working directory
cwd = os.path.abspath("./Final_Recommendation_Letters/")
# create a csv file
with open('dir_subdir_new.csv', 'w') as f:
    writer = csv.writer(f)
    writer.writerow(['dir', 'subdir'])
    # loop through all the directories
    for dir in os.listdir(cwd):
        # extract the number from the directory name
        dir_number = re.findall(r'\d+', dir)[0]
        # loop through all the files in the directory
        for subdir in os.listdir(os.path.join(cwd, dir)):
            # extract the number from the file name
            subdir_number = re.findall(r'\d+', subdir)[0]
            writer.writerow([dir_number, subdir_number])

print('done')

In [None]:
# load the csv file
import pandas as pd
df = pd.read_csv('dir_subdir_new.csv')
df.head()

In [None]:
import networkx as nx
import matplotlib as plt
G = nx.from_pandas_edgelist(df, 'dir', 'subdir', create_using=nx.DiGraph())
nx.draw(G, with_labels=True)

In [None]:

# Create a dictionary to store edge weights based on Years_of_Experience and Managerial_CV_Score
edge_weights = {}
for u, v in G.edges():
  try:
    recommender_id = int(u)
    recommended_id = int(v)
    recommender_managerial_score = results_df[results_df['ID'] == recommender_id]['Managerial_Score'].values[0]
    recommender_years_of_experience = results_df[results_df['ID'] == recommender_id]['Years_of_Experience'].values[0]
    # Calculate the out-degree of the recommender node
    out_degree = G.out_degree(u)

    # Combine the years of experience, managerial CV score, and out-degree to compute the edge weight
    edge_weight = 0.5 * recommender_years_of_experience + 5 * recommender_managerial_score - 0.1 * out_degree
    edge_weights[(u, v)] = edge_weight
  except IndexError:
      # Handle cases where the ID is not found in the results_df (e.g., if there's a mismatch between the IDs in the graph and in the DataFrame)
      print(f"Warning: ID {u} or {v} not found in results_df. Skipping edge weight calculation.")
      edge_weights[(u, v)] = 0  # Or assign a default weight


# Add edge weights to the graph
nx.set_edge_attributes(G, edge_weights, 'weight')


# Optional: Print the weighted edges
for u, v, data in G.edges(data=True):
    print(f"Edge ({u}, {v}): Weight = {data['weight']}")



In [None]:
# prompt: show lesser number of nodes(20) and depict thickness of edge by edge weight.

import matplotlib.pyplot as plt

# Extract a subgraph with a limited number of nodes (e.g., 20)
num_nodes_to_show = 50
subgraph_nodes = list(G.nodes())[:num_nodes_to_show]
subgraph = G.subgraph(subgraph_nodes)

# Get edge weights for the subgraph
edge_widths = [subgraph[u][v]['weight'] for u, v in subgraph.edges()]

# Normalize edge widths for better visualization
normalized_edge_widths = [w / max(edge_widths) * 5 if edge_widths else 1 for w in edge_widths]


# Draw the subgraph with edge thickness based on weights
plt.figure(figsize=(12, 8))
pos = nx.spring_layout(subgraph, seed=42)
nx.draw(subgraph, pos, with_labels=True, node_size=500, node_color='skyblue', font_size=10,
        edge_color='gray', width=normalized_edge_widths)
plt.title('Subgraph with Weighted Edges (Edge Thickness Represents Weight)')
plt.show()


In [None]:
df = pd.read_csv('Final_Persons_And_Recommenders.csv')
# change index to the column 'ID'
df.set_index('ID', inplace=True)
df.head()

In [None]:
# calculate PageRank Score for all edges
pr = nx.pagerank(G, alpha=0.9)
# add the PageRank score to the dataframe
df['PageRank'] = df.index.map(pr)
df.head()

In [None]:
# find betweennees centrality for all nodes
bc = nx.betweenness_centrality(G)
# add betweenness centrality to the dataframe
df['Betweenness'] = df.index.map(bc)
df['Inverse_Betweenness'] = 1/(df['Betweenness'] + 10)
df.drop('Betweenness', axis=1, inplace=True)
df.head()

In [None]:
# Find the number of ingoing edges for all nodes
in_deg = G.in_degree()

# Convert the in-degree dictionary to a mapping for easier lookup
in_deg_dict = dict(in_deg)

# Add the number of ingoing edges to the DataFrame
df['InDegree'] = df.index.map(in_deg_dict)

# Display the first few rows to verify
df.head()


In [None]:
# # find number of ingoing edges for all nodes
# in_deg = G.in_degree()
# # add the number of ingoing edges to the dataframe
# df['InDegree'] = df.index.map(in_deg)
# df.head()

In [None]:
# prompt: For each node take the sum of the incoming edge weights and store it in df after normalizing

import pandas as pd

# Calculate the sum of incoming edge weights for each node
incoming_edge_weights = {}
for node in G.nodes():
  total_weight = 0
  for u, v, data in G.edges(data=True):
    if v == node:
      total_weight += data['weight']
  incoming_edge_weights[node] = total_weight

# Normalize the incoming edge weights (optional)
max_weight = max(incoming_edge_weights.values())
min_weight = min(incoming_edge_weights.values())
if max_weight != min_weight:
  normalized_incoming_edge_weights = {node: (weight - min_weight) / (max_weight - min_weight) for node, weight in incoming_edge_weights.items()}
else:
  normalized_incoming_edge_weights = {node: 0 for node, weight in incoming_edge_weights.items()}

# Add the normalized incoming edge weights to the DataFrame
df['IncomingEdgeWeightSum'] = df.index.map(normalized_incoming_edge_weights)
df.head()



In [None]:
# add flag for all reciprocated edges
df['Reciprocity'] = 0
for i, j in G.edges():
    if G.has_edge(j, i):
        df.loc[j, 'Reciprocity'] = 1
        df.loc[i, 'Reciprocity'] = 1
df.head()

In [None]:
# Create a composite score using:

# min max scaling
df['PageRank'] = 100 * (df['PageRank'] - df['PageRank'].min()) / (df['PageRank'].max() - df['PageRank'].min())
df['Inverse_Betweenness'] = 100 * (df['Inverse_Betweenness'] - df['Inverse_Betweenness'].min()) / (df['Inverse_Betweenness'].max() - df['Inverse_Betweenness'].min())

df['CreditScore'] = df['IncomingEdgeWeightSum']*((0.4 * df['PageRank'] + 0.3 * df['Inverse_Betweenness']) / (df['InDegree'] + 1)) - 0.3 * df['Reciprocity']
df['CreditScore'] = 100 * (df['CreditScore'] - df['CreditScore'].min()) / (df['CreditScore'].max() - df['CreditScore'].min())
df.head()

In [None]:
# save the dataframe to a csv file
df.to_csv('Final_Credit_Score.csv')

In [None]:
# rename dir to cv_id and subdir to rec_id
df.rename(columns={'dir': 'cv_id', 'subdir': 'rec_id'}, inplace=True)
df.head()

In [None]:
# find all reciprocal connections
reciprocal_edges = []
for edge in G.edges():
    if (edge[1], edge[0]) in G.edges():
        reciprocal_edges.append(edge)
reciprocal_edges

# visualize the graph with reciprocal connections shown in red
import matplotlib.pyplot as plt
pos = nx.spring_layout(G)
nx.draw_networkx_nodes(G, pos, node_size=100)
# nx.draw_networkx_labels(G, pos)
nx.draw_networkx_edges(G, pos)
nx.draw_networkx_edges(G, pos, edgelist=reciprocal_edges, edge_color='r', arrows=True)
# also show the reciprocal node in red
nx.draw_networkx_nodes(G, pos, nodelist=[i[0] for i in reciprocal_edges], node_color='r', node_size=100)
nx.draw_networkx_nodes(G, pos, nodelist=[i[1] for i in reciprocal_edges], node_color='r', node_size=100)
plt.show()



In [None]:
# percentage of reciprocal connections
reciprocal_percentage = len(reciprocal_edges) / len(G.edges())
reciprocal_percentage * 100