<a href="https://colab.research.google.com/github/Dorcy64/video-recommender-algorithm/blob/main/video_recommender_algorithm.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [25]:
import numpy
# Generate 100 False/0 rows and 5 rows
X = numpy.zeros((100, 5), 
                dtype="bool")

# Channel names for building the date set
features = ["Investing101", 
            "FunnyCatVideos", 
            "BuraaTech", 
            "MammothInteractive", 
            "VideoGameReviewer"]

INVESTIONG101_INDEX = 0
FUNNYCATVIDEOS_INDEX = 1
BURATECH_INDEX = 2
MAMMOTHINTERACTIVE_INDEX = 3
VIDEOGAMEREVIEWER_INDEX = 4

VIEWER_COUNT = X.shape[0]

LIKED_VIDEO = 1

# Building the 100 user data set likes using numpy.random.random() to set the
# boolean to true if the user liked it
for viewer in range(VIEWER_COUNT):

  if numpy.random.random() < 0.3:
    X[viewer][INVESTIONG101_INDEX] = LIKED_VIDEO

    if numpy.random.random() < 0.5:
      X[viewer][FUNNYCATVIDEOS_INDEX] = LIKED_VIDEO
    
    if numpy.random.random() < 0.2:
      X[viewer][BURATECH_INDEX] = LIKED_VIDEO
    
    if numpy.random.random() < 0.3:
      X[viewer][MAMMOTHINTERACTIVE_INDEX] = LIKED_VIDEO
    
    if numpy.random.random() < 0.4:
      X[viewer][VIDEOGAMEREVIEWER] = LIKED_VIDEO
  else:
    if numpy.random.random() < 0.5:
      X[viewer][FUNNYCATVIDEOS_INDEX] = LIKED_VIDEO
      
      if numpy.random.random() < 0.3:
        X[viewer][BURATECH_INDEX] = LIKED_VIDEO
      
      if numpy.random.random() < 0.2:
        X[viewer][MAMMOTHINTERACTIVE_INDEX] = LIKED_VIDEO
      
      if numpy.random.random() < 0.4:
        X[viewer][VIDEOGAMEREVIEWER] = LIKED_VIDEO
    else:
      if numpy.random.random() < 0.7:
        X[viewer][BURATECH_INDEX] = LIKED_VIDEO

      if numpy.random.random() < 0.55:
        X[viewer][MAMMOTHINTERACTIVE_INDEX] = LIKED_VIDEO
      
      if numpy.random.random() < 0.8:
        X[viewer][VIDEOGAMEREVIEWER] = LIKED_VIDEO

  # If the viewer didn't like any video recommend something
  if X[viewer].sum() == 0:
    X[viewer][VIDEOGAMEREVIEWER] = LIKED_VIDEO

# save the created dataset 
numpy.savetxt("viewer_profiles.txt", X, fmt="%d")


In [26]:
# Retrieve the numpy array data set and check if its created
X = numpy.loadtxt("viewer_profiles.txt")
print(X.shape)

(100, 5)


In [27]:
# Count how many viewers like each channel.
number_of_mammothinteractive_likes = 0
number_of_funnycatvideos_likes = 0
number_of_buratech_likes = 0
number_of_investing101_likes = 0
number_of_videogamereviewer_likes = 0

for viewer in X:
  if viewer[MAMMOTHINTERACTIVE] == 1:
    number_of_mammothinteractive_likes += 1

  if viewer[FUNNYCATVIDEOS_INDEX] == 1:
    number_of_funnycatvideos_likes += 1

  if viewer[BURATECH_INDEX] == 1:
    number_of_buratech_likes += 1
  
  if viewer[INVESTIONG101_INDEX] == 1:
    number_of_investing101_likes += 1
  if viewer[VIDEOGAMEREVIEWER] == 1:
    number_of_videogamereviewer_likes += 1
  
print("Mammoth Interactive: {0} likes\n\
Funny Cat Videos: {1} likes\n\
Bura Tech: {2} likes\n\
Investing 101: {3} likes\n\
VideoGameReviewer: {4} likes".format(
    number_of_mammothinteractive_likes, 
    number_of_funnycatvideos_likes,
    number_of_buratech_likes,
    number_of_investing101_likes,
    number_of_videogamereviewer_likes,
))

Mammoth Interactive: 24 likes
Funny Cat Videos: 58 likes
Bura Tech: 41 likes
Investing 101: 22 likes
VideoGameReviewer: 49 likes


In [28]:
rule_valid = 0
rule_invalid = 0
# Check to see if a user in the past has liked both channels
for viewer in X: 
  if viewer[MAMMOTHINTERACTIVE_INDEX] == 1:
    if viewer[VIDEOGAMEREVIEWER_INDEX] == 1:
      rule_valid += 1
    else:
      rule_invalid += 1

# Printin the rule to see the results
print("If the viewer liked mamoth interactive they will like video game reviewer")
print("{0} times the rule was valid".format(rule_valid))
print("{0} times the rule was invalid".format(rule_invalid))


If the viewer liked mamoth interactive they will like video game reviewer
12 times the rule was valid
12 times the rule was invalid


In [29]:
support = rule_valid
confidence = rule_valid / number_of_mammothinteractive_likes

print("Support: {0}".format(support))

# Is there colleration between the two channels?
# If the colleration is above a certain percentage recommend the channel
print("Confidence: {0:.1f}%".format(100 * confidence))

Support: 12
Confidence: 50.0%


In [36]:
number_of_samples, number_of_features = X.shape

from collections import defaultdict
 
number_of_occurences = defaultdict(int)

times_rule_valid     = defaultdict(int)

times_rule_invalid   = defaultdict(int)

for viewer in X:
  for premise in range(number_of_features):
    # If the first channel was not liked skipp the loop
    if viewer[premise] == 0: continue
    # Else increase the global occurence by one
    number_of_occurences[premise] += 1

    for conclusions in range(number_of_features):
      # If checking the channel against the channel then skip
      if premise == conclusions: continue

      # if viewer liked both channels then make a correlaation
      if viewer[conclusions] == 1:
        times_rule_valid[(premise, conclusions)] += 1

      # else make a negative correlation
      else:
        times_rule_invalid[(premise, conclusions)] += 1 

support = times_rule_valid
confidence = defaultdict(int)

for premise, conclusions in times_rule_valid.keys():
  # New rule for each number of channel

  confidence[(premise, conclusions)] = times_rule_valid[(premise, conclusions)] / number_of_occurences[premise]

for premise, conclusions in confidence:

  first_channel = features[premise]
  second_channel = features[conclusions]

  print(f"Rule: if a viewer liked {first_channel}, they will also like {second_channel}")
  print("Confidence: {0:.1f}%".format(100* confidence[(premise, conclusions)]))
  print(f"Support:  {support[(premise, conclusions)]}\n")

Rule: if a viewer liked BuraaTech, they will also like VideoGameReviewer
Confidence: 65.9%
Support:  27

Rule: if a viewer liked VideoGameReviewer, they will also like BuraaTech
Confidence: 55.1%
Support:  27

Rule: if a viewer liked FunnyCatVideos, they will also like BuraaTech
Confidence: 25.9%
Support:  15

Rule: if a viewer liked BuraaTech, they will also like FunnyCatVideos
Confidence: 36.6%
Support:  15

Rule: if a viewer liked Investing101, they will also like FunnyCatVideos
Confidence: 54.5%
Support:  12

Rule: if a viewer liked Investing101, they will also like BuraaTech
Confidence: 22.7%
Support:  5

Rule: if a viewer liked FunnyCatVideos, they will also like Investing101
Confidence: 20.7%
Support:  12

Rule: if a viewer liked BuraaTech, they will also like Investing101
Confidence: 12.2%
Support:  5

Rule: if a viewer liked BuraaTech, they will also like MammothInteractive
Confidence: 29.3%
Support:  12

Rule: if a viewer liked MammothInteractive, they will also like BuraaTec

## What does the code above do?:

We are essentially comparing each channel to the other:
Each number will represent a channel:

0-1 0-2 0-3 0-4

1-0 1-2 1-3 1-4

2-0 2-1 2-3 2-4

3-0 3-1 3-2 3-4

4-0 4-1 4-2 4-3

and so on...

#### Now that we have the data we are going to look at how we are going to use it

In [38]:
from pprint import pprint

pprint(list(support.items()))

[((2, 4), 27),
 ((4, 2), 27),
 ((1, 2), 15),
 ((2, 1), 15),
 ((0, 1), 12),
 ((0, 2), 5),
 ((1, 0), 12),
 ((2, 0), 5),
 ((2, 3), 12),
 ((3, 2), 12),
 ((3, 4), 12),
 ((4, 3), 12),
 ((1, 4), 17),
 ((4, 1), 17),
 ((1, 3), 11),
 ((3, 1), 11),
 ((0, 4), 7),
 ((4, 0), 7),
 ((0, 3), 4),
 ((3, 0), 4)]


In [41]:
from operator import itemgetter
support_sorted = sorted(support.items(), key=itemgetter(1), reverse=True)

def print_results(premise, conclusions, support, confidence, features):
  first_channel = features[premise]
  second_channel = features[conclusions]

  print(f"Rule if a viewer like {first_channel}, they will like {second_channel}")

  print("Confidence: {0:.1f}%".format(100* confidence[(premise, conclusions)]))
  print(f"Support:  {support[(premise, conclusions)]}\n")
# This is the actual number of times use liked the video
print("Sorting the results by support:\n")
for index in range(5):

  (premise, conclusions) = support_sorted[index][0]
  print_results(premise, conclusions, support, confidence, features)


Sorting the results by support:

Rule if a viewer like BuraaTech, they will like VideoGameReviewer
Confidence: 65.9%
Support:  27

Rule if a viewer like VideoGameReviewer, they will like BuraaTech
Confidence: 55.1%
Support:  27

Rule if a viewer like FunnyCatVideos, they will like VideoGameReviewer
Confidence: 29.3%
Support:  17

Rule if a viewer like VideoGameReviewer, they will like FunnyCatVideos
Confidence: 34.7%
Support:  17

Rule if a viewer like FunnyCatVideos, they will like BuraaTech
Confidence: 25.9%
Support:  15



In [42]:
# This it the confiddence we think the user will like the video
print("Sorting by confidence: \n")
confidence_sorted = sorted(confidence.items(), key=itemgetter(1), reverse=True)
for index in range(5):
  (premise, conclusions) = confidence_sorted[index][0]
  print_results(premise, conclusions, support, confidence, features)

Sorting by confidence: 

Rule if a viewer like BuraaTech, they will like VideoGameReviewer
Confidence: 65.9%
Support:  27

Rule if a viewer like VideoGameReviewer, they will like BuraaTech
Confidence: 55.1%
Support:  27

Rule if a viewer like Investing101, they will like FunnyCatVideos
Confidence: 54.5%
Support:  12

Rule if a viewer like MammothInteractive, they will like BuraaTech
Confidence: 50.0%
Support:  12

Rule if a viewer like MammothInteractive, they will like VideoGameReviewer
Confidence: 50.0%
Support:  12

