Linear Programming in Python : Create Watch List for TED Videos

In [1]:
# matplotlib inline

from pulp import *
import numpy as np
import pandas as pd
import re
import matplotlib.pyplot as plt
from IPython.display import Image

In [2]:
# Download the dataset from https://www.kaggle.com/rounakbanik/ted-talks

# Read the dataset into pandas dataframe, convert duration from seconds to minutes
ted = pd.read_csv('E:/......./ted_main.csv', encoding='ISO-8859-1')
ted['duration'] = ted['duration'] / 60
ted = ted.round({'duration': 1})

# Select subset of columns & rows 
data = ted
selected_cols = ['name', 'event', 'duration', 'views']
data = data[selected_cols]
data.reset_index(inplace=True)
data.head()

Unnamed: 0,index,name,event,duration,views
0,0,Ken Robinson: Do schools kill creativity?,TED2006,19.4,47227110
1,1,Al Gore: Averting the climate crisis,TED2006,16.3,3200520
2,2,David Pogue: Simplicity sells,TED2006,21.4,1636292
3,3,Majora Carter: Greening the ghetto,TED2006,18.6,1697550
4,4,Hans Rosling: The best stats you've ever seen,TED2006,19.8,12005869


In [3]:
# create LP object,
# set up as a maximization problem --> since we want to maximize the number of TED talks to watch
prob = pulp.LpProblem('WatchingTEDTalks', pulp.LpMaximize)

In [4]:
# create decision - yes or no to watch the talk?
decision_variables = []
for rownum, row in data.iterrows():
    # variable = set('x' + str(rownum))
    variable = str('x' + str(row['index']))
    variable = pulp.LpVariable(str(variable), lowBound = 0, upBound = 1, cat = 'Integer') # make variable binary
    decision_variables.append(variable)
    
print('Total number of decision variables: ' + str(len(decision_variables)))

Total number of decision variables: 2550


In [5]:
# Create optimization Function
total_views = ''
for rownum, row in data.iterrows():
    for i, talk in enumerate(decision_variables):
        if rownum == i:
            formula = row['views'] * talk
            total_views += formula
            
prob += total_views
#print('Optimization function: ' + str(total_views))

In [6]:
# Contraints
total_time_available_for_talks = 10*60 # Total time available is 10 hours . Converted to minutes
total_talks_can_watch = 25 # Don't want an overload information

In [7]:
# Create Constraint 1 - Time for talks
total_time_talks = ''
for rownum, row in data.iterrows():
    for i,  talk in enumerate(decision_variables):
        if rownum == i:
            formula = row['duration']*talk
            total_time_talks += formula
            
prob += (total_time_talks == total_time_available_for_talks)

In [8]:
# Create Constraint 2 - Number of talks
total_talks = ''

for rownum, row in data.iterrows():
    for i, talk in enumerate(decision_variables):
        if rownum == i:
            formula = talk
            total_talks += formula
            
prob += (total_talks == total_talks_can_watch)

In [9]:
#print(prob)
#prob.writeLP('WatchingTEDTalks.lp')

In [10]:
optimization_result = prob.solve(GLPK_CMD(path = 'E:/....../glpsol.exe'))

assert optimization_result == pulp.LpStatusOptimal
print('Status:', LpStatus[prob.status])
print('Optimal Solution to the problem: ', value(prob.objective))
print('Individual decision variables: ')
#for v in prob.variables():
#    print(v.name, '=', v.varValue)

Status: Optimal
Optimal Solution to the problem:  470591400
Individual decision variables: 


In [11]:
# reorder results
variable_name = []
variable_value = []

for v in prob.variables():
    variable_name.append(v.name)
    variable_value.append(v.varValue)
    
df = pd.DataFrame({'index': variable_name, 'value': variable_value})
for rownum, row in df.iterrows():
    value = re.findall(r'(\d+)', row['index'])
    df.loc[rownum, 'index'] = int(value[0])
    
df = df.sort_values(by = 'index')
df = df.sort_values(by = 'index')
result = pd.merge(data, df, on = 'index')
result = result[result['value'] == 1].sort_values(by = 'views', ascending = False)
selected_cols_final = ['name', 'event', 'duration', 'views']
final_set_of_talks_to_watch = result[selected_cols_final]

In [12]:
from IPython.display import display, HTML
display(HTML(final_set_of_talks_to_watch.to_html(index=False)))

name,event,duration,views
Ken Robinson: Do schools kill creativity?,TED2006,19.4,47227110
Amy Cuddy: Your body language may shape who yo...,TEDGlobal 2012,21.0,43155405
Simon Sinek: How great leaders inspire action,TEDxPuget Sound,18.1,34309432
BrenÃ© Brown: The power of vulnerability,TEDxHouston,20.3,31168150
Mary Roach: 10 things you didn't know about or...,TED2009,16.7,22270883
Julian Treasure: How to speak so that people w...,TEDGlobal 2013,10.0,21594632
Jill Bolte Taylor: My stroke of insight,TED2008,18.3,21190883
Tony Robbins: Why we do what we do,TED2006,21.8,20685401
James Veitch: This is what happens when you re...,TEDGlobal>Geneva,9.8,20475972
Cameron Russell: Looks aren't everything. Beli...,TEDxMidAtlantic,9.6,19787465
