In [1]:
# Import necessary libraries
import pandas as pd
from sklearn.preprocessing import StandardScaler
import joblib


def preprocess_influencer_data(data):
    numerical_features = ['followers', 'engagement_metrics', 'follower_growth_rate']
    scaler = StandardScaler()
    data[numerical_features] = scaler.fit_transform(data[numerical_features])
    return data, scaler

# Load influencer data
influencer_data = pd.read_csv('influencer_data.csv')

# Preprocess influencer data
influencer_data, scaler = preprocess_influencer_data(influencer_data)

# Save processed influencer data and scaler
joblib.dump(influencer_data, 'influencer_data_processed.pkl')
joblib.dump(scaler, 'influencer_data_scaler.pkl')

def calculate_match_score(row, trend_params):
    match_score = 0
    for key, value in trend_params.items():
        if key in row and row[key] == value:
            match_score += 1
    return match_score

def get_top_influencers(influencer_data, predictions, scaler, top_n=5):
    numerical_features = ['followers', 'engagement_metrics', 'follower_growth_rate']
    influencer_data[numerical_features] = scaler.inverse_transform(influencer_data[numerical_features])

    influencer_data['match_score'] = influencer_data.apply(lambda row: calculate_match_score(row, predictions), axis=1)

    score_weights = {
        'impact_score': 0.70,
        'brand_fit_score': 0.30
    }

    influencer_data['final_score'] = (
        influencer_data['match_score'] +
        score_weights['impact_score'] * influencer_data['impact_score'] +
        score_weights['brand_fit_score'] * influencer_data['brand_fit_score']
    )

    top_influencers = influencer_data.sort_values(by='final_score', ascending=False).head(top_n)
    
    output_columns = ['influencer_id', 'gender', 'platform', 'link', 'followers', 'public_perception', 
                      'engagement_metrics', 'follower_locations', 'follower_growth_rate', 'style', 
                      'past_collab', 'age_range', 'impact_score', 'brand_fit_score', 'final_score']
    
    return top_influencers[output_columns]

# Save recommendation function
joblib.dump(get_top_influencers, 'influencer_recommender_function.pkl')

['influencer_recommender_function.pkl']