# LightFM model for influence marketing

Description

### Model theoretical explanation
This model is based in ...

### 1. Import Libraries

In [230]:
# Install all the libraries in requirements.txt
import sys
import os

import itertools
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import scrapbook as sb
import requests
import io

import lightfm
from lightfm import LightFM
from lightfm.data import Dataset
from lightfm import cross_validation

# Import LightFM's evaluation metrics
from lightfm.evaluation import precision_at_k as lightfm_prec_at_k
from lightfm.evaluation import recall_at_k as lightfm_recall_at_k

# Import repo's evaluation metrics
from recommenders.evaluation.python_evaluation import precision_at_k, recall_at_k

from recommenders.utils.timer import Timer
from recommenders.datasets import movielens
from recommenders.models.lightfm.lightfm_utils import (
    track_model_metrics, prepare_test_df, prepare_all_predictions,
    compare_metric, similar_users, similar_items)

print("System version: {}".format(sys.version))
print("LightFM version: {}".format(lightfm.__version__))

System version: 3.8.7 (tags/v3.8.7:6503f05, Dec 21 2020, 17:59:51) [MSC v.1928 64 bit (AMD64)]
LightFM version: 1.16


### 2. Defining Variables

In [231]:
# Select MovieLens data size
MOVIELENS_DATA_SIZE = '100k'

# default number of recommendations
K = 10
# percentage of data used for testing
TEST_PERCENTAGE = 0.25
# model learning rate
LEARNING_RATE = 0.25
# no of latent factors
NO_COMPONENTS = 20
# no of epochs to fit model
NO_EPOCHS = 20
# no of threads to fit model
NO_THREADS = 32
# regularisation for both user and item features
ITEM_ALPHA = 1e-6
USER_ALPHA = 1e-6

# seed for pseudonumber generations
SEED = 42

### 3. Retrive Data

In [232]:
df_co = pd.read_csv('df_co.csv')
df_co

Unnamed: 0,Name_co,Category_co,Hashtags_co,Country_co,Followers_co
0,Brewed Awakening,Photography,#goodnight#realestate#sky,DR Congo,502645
1,The Cottage Cafe,Cinema,#flex#foodinspo#makeup,Ethiopia,193415
2,Aroma Cafe,Photography,#fitness#football#couplesgoals,Philippines,394953
3,The Kitchen Table,Business,#quote#girl,Egypt,286224
4,The Bluebird Cafe,Kids,#coffee#foodforfoodies,Turkey,364079
...,...,...,...,...,...
95,The Greenhouse,Sports with a ball,#nature#follow#beach,Spain,483699
96,The Chopped Leaf,Fashion,#blessings#fitness#instaphoto#training#happyme,Germany,161492
97,The Baking Company,Kids,#surfing#foodstylist#boxing#holiday,Turkey,446105
98,Gourmet Kitchen,Kids,#trendy#food#naturelovers#foodbloggersofinstagram,Myanmar,34133


In [233]:
df_influ = pd.read_csv('df_inf.csv')
df_influ

Unnamed: 0,Account,Link,Followers,Audience Country,Authentic engagement,Engagement avg,Category1,Hashtags,Cost Story,Cost Post
0,cristiano,https://www.instagram.com/cristiano/,400100000.0,India,7800000.0,9500000.0,Sports with a ball,#foodlover#training#motivational#foodheaven#ba...,468000.0,1092000.0
1,kyliejenner,https://www.instagram.com/kyliejenner/,308800000.0,United States,6200000.0,10100000.0,Fashion,#fun#follow#beautyblogger#happiness#landscapep...,372000.0,868000.0
2,leomessi,https://www.instagram.com/leomessi/,306300000.0,Argentina,4800000.0,6500000.0,Sports with a ball,#weekend,288000.0,672000.0
3,kendalljenner,https://www.instagram.com/kendalljenner/,217800000.0,United States,3400000.0,5400000.0,Modeling,#cardio#money#instagood#style#entrepreneur,204000.0,476000.0
4,selenagomez,https://www.instagram.com/selenagomez/,295800000.0,United States,2700000.0,3600000.0,Music,#instatravel#fashionista#loveit#shopping#foodporn,162000.0,378000.0
...,...,...,...,...,...,...,...,...,...,...
995,senoritasaeva,https://www.instagram.com/senoritasaeva/,7700000.0,Russia,246600.0,318200.0,Lifestyle,#fitnessmotivation#wanderlust#doglover#girl#ph...,14796.0,34524.0
996,manuelneuer,https://www.instagram.com/manuelneuer/,11500000.0,Germany,146500.0,210200.0,Sports with a ball,#smiles#inspiration#education#blackandwhite#oo...,8790.0,20510.0
997,sahilkhan,https://www.instagram.com/sahilkhan/,10100000.0,India,176500.0,239800.0,Fitness,#follow#fitness#blackandwhite#friendshipgoals#...,10590.0,24710.0
998,mohanshakti,https://www.instagram.com/mohanshakti/,13700000.0,India,146400.0,175500.0,Art,#sunset#foodlife#trailrunning#trendy#beauty#in...,8784.0,20496.0


### 4. Prepare Data

In [234]:
df2_co = pd.concat([df_co]*1000, ignore_index=True)

In [235]:
df2_influ = pd.DataFrame(np.repeat(df_influ.values, 100, axis=0), columns=["Account", "Link", "Followers", "Audience Country", "Authentic engagement", "Engagement avg", "Category1", "Hashtags", "Cost Story", "Cost Post"])


In [236]:
df = pd.concat([df2_influ, df2_co], axis=1)

In [237]:
def Convert(string):
    li = list(string.split("#"))
    return li

In [238]:
df['Hashtags'] = df['Hashtags'].astype(str)

In [239]:
h = []
h_co = []

for i in range(len(df['Hashtags'])):
    h.append(Convert(df['Hashtags'][i]))
    h_co.append(Convert(df['Hashtags_co'][i]))

df['Hashtags'] = h
df['Hashtags_co'] = h_co

In [240]:
for i in range(len(df['Hashtags'])):
    df['Hashtags'][i].pop(0)
    df['Hashtags_co'][i].pop(0)

In [241]:
df

Unnamed: 0,Account,Link,Followers,Audience Country,Authentic engagement,Engagement avg,Category1,Hashtags,Cost Story,Cost Post,Name_co,Category_co,Hashtags_co,Country_co,Followers_co
0,cristiano,https://www.instagram.com/cristiano/,400100000.0,India,7800000.0,9500000.0,Sports with a ball,"[foodlover, training, motivational, foodheaven...",468000.0,1092000.0,Brewed Awakening,Photography,"[goodnight, realestate, sky]",DR Congo,502645
1,cristiano,https://www.instagram.com/cristiano/,400100000.0,India,7800000.0,9500000.0,Sports with a ball,"[foodlover, training, motivational, foodheaven...",468000.0,1092000.0,The Cottage Cafe,Cinema,"[flex, foodinspo, makeup]",Ethiopia,193415
2,cristiano,https://www.instagram.com/cristiano/,400100000.0,India,7800000.0,9500000.0,Sports with a ball,"[foodlover, training, motivational, foodheaven...",468000.0,1092000.0,Aroma Cafe,Photography,"[fitness, football, couplesgoals]",Philippines,394953
3,cristiano,https://www.instagram.com/cristiano/,400100000.0,India,7800000.0,9500000.0,Sports with a ball,"[foodlover, training, motivational, foodheaven...",468000.0,1092000.0,The Kitchen Table,Business,"[quote, girl]",Egypt,286224
4,cristiano,https://www.instagram.com/cristiano/,400100000.0,India,7800000.0,9500000.0,Sports with a ball,"[foodlover, training, motivational, foodheaven...",468000.0,1092000.0,The Bluebird Cafe,Kids,"[coffee, foodforfoodies]",Turkey,364079
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99995,eduincaz,https://www.instagram.com/eduincaz/,6200000.0,Mexico,305600.0,391900.0,Lifestyle,"[holidayvibes, doglovers]",18336.0,42784.0,The Greenhouse,Sports with a ball,"[nature, follow, beach]",Spain,483699
99996,eduincaz,https://www.instagram.com/eduincaz/,6200000.0,Mexico,305600.0,391900.0,Lifestyle,"[holidayvibes, doglovers]",18336.0,42784.0,The Chopped Leaf,Fashion,"[blessings, fitness, instaphoto, training, hap...",Germany,161492
99997,eduincaz,https://www.instagram.com/eduincaz/,6200000.0,Mexico,305600.0,391900.0,Lifestyle,"[holidayvibes, doglovers]",18336.0,42784.0,The Baking Company,Kids,"[surfing, foodstylist, boxing, holiday]",Turkey,446105
99998,eduincaz,https://www.instagram.com/eduincaz/,6200000.0,Mexico,305600.0,391900.0,Lifestyle,"[holidayvibes, doglovers]",18336.0,42784.0,Gourmet Kitchen,Kids,"[trendy, food, naturelovers, foodbloggersofins...",Myanmar,34133


In [242]:
num_coincidente = df.apply(lambda row: len(set(row['Hashtags']).intersection(set(row['Hashtags_co']))), axis=1)
points_cat = np.where((df['Category1'] == df['Category_co']), 10, 0)
points_country = np.where((df['Audience Country'] == df['Country_co']), 5, 0)
points_eng = df['Authentic engagement']/df['Followers']
df['num_coincidentes'] = num_coincidente
df['points_eng'] = points_eng
df['points_cat'] = points_cat
df['points_country'] = points_country
df['Puntos'] = df['points_cat'] + df['points_country'] + df["num_coincidentes"] + df['points_eng']

df = df.drop(['points_cat', 'points_country', 'num_coincidentes', 'Engagement avg', 'points_eng'], axis=1)


In [243]:
df.dtypes

Account                 object
Link                    object
Followers               object
Audience Country        object
Authentic engagement    object
Category1               object
Hashtags                object
Cost Story              object
Cost Post               object
Name_co                 object
Category_co             object
Hashtags_co             object
Country_co              object
Followers_co             int64
Puntos                  object
dtype: object

In [244]:
df['Followers'] = df['Followers'].astype(int)
df['Authentic engagement'] = df['Authentic engagement'].astype(float)
df['Cost Story'] = df['Cost Story'].astype(float)
df['Followers_co'] = df['Followers_co'].astype(int)
df['Puntos'] = df['Puntos'].astype(float)
df.dtypes

Account                  object
Link                     object
Followers                 int32
Audience Country         object
Authentic engagement    float64
Category1                object
Hashtags                 object
Cost Story              float64
Cost Post                object
Name_co                  object
Category_co              object
Hashtags_co              object
Country_co               object
Followers_co              int32
Puntos                  float64
dtype: object

In [245]:
df

Unnamed: 0,Account,Link,Followers,Audience Country,Authentic engagement,Category1,Hashtags,Cost Story,Cost Post,Name_co,Category_co,Hashtags_co,Country_co,Followers_co,Puntos
0,cristiano,https://www.instagram.com/cristiano/,400100000,India,7800000.0,Sports with a ball,"[foodlover, training, motivational, foodheaven...",468000.0,1092000.0,Brewed Awakening,Photography,"[goodnight, realestate, sky]",DR Congo,502645,0.019495
1,cristiano,https://www.instagram.com/cristiano/,400100000,India,7800000.0,Sports with a ball,"[foodlover, training, motivational, foodheaven...",468000.0,1092000.0,The Cottage Cafe,Cinema,"[flex, foodinspo, makeup]",Ethiopia,193415,0.019495
2,cristiano,https://www.instagram.com/cristiano/,400100000,India,7800000.0,Sports with a ball,"[foodlover, training, motivational, foodheaven...",468000.0,1092000.0,Aroma Cafe,Photography,"[fitness, football, couplesgoals]",Philippines,394953,0.019495
3,cristiano,https://www.instagram.com/cristiano/,400100000,India,7800000.0,Sports with a ball,"[foodlover, training, motivational, foodheaven...",468000.0,1092000.0,The Kitchen Table,Business,"[quote, girl]",Egypt,286224,0.019495
4,cristiano,https://www.instagram.com/cristiano/,400100000,India,7800000.0,Sports with a ball,"[foodlover, training, motivational, foodheaven...",468000.0,1092000.0,The Bluebird Cafe,Kids,"[coffee, foodforfoodies]",Turkey,364079,0.019495
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99995,eduincaz,https://www.instagram.com/eduincaz/,6200000,Mexico,305600.0,Lifestyle,"[holidayvibes, doglovers]",18336.0,42784.0,The Greenhouse,Sports with a ball,"[nature, follow, beach]",Spain,483699,0.049290
99996,eduincaz,https://www.instagram.com/eduincaz/,6200000,Mexico,305600.0,Lifestyle,"[holidayvibes, doglovers]",18336.0,42784.0,The Chopped Leaf,Fashion,"[blessings, fitness, instaphoto, training, hap...",Germany,161492,0.049290
99997,eduincaz,https://www.instagram.com/eduincaz/,6200000,Mexico,305600.0,Lifestyle,"[holidayvibes, doglovers]",18336.0,42784.0,The Baking Company,Kids,"[surfing, foodstylist, boxing, holiday]",Turkey,446105,0.049290
99998,eduincaz,https://www.instagram.com/eduincaz/,6200000,Mexico,305600.0,Lifestyle,"[holidayvibes, doglovers]",18336.0,42784.0,Gourmet Kitchen,Kids,"[trendy, food, naturelovers, foodbloggersofins...",Myanmar,34133,0.049290
