-
Notifications
You must be signed in to change notification settings - Fork 1
/
helper.py
88 lines (69 loc) · 2.62 KB
/
helper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import pandas as pd
import numpy as np
from collections import Counter
class User:
def __init__(self, id, watched):
self.id = id
self.watched = watched
@property
def number_watched_films(self):
return len(self.watched.keys())
class Users:
def __init__(self):
self.bigtv_dataset = None
self.bigtv_catalog = None
self.users = None
self.catalog = None
def load_dataset(self, filename, sep):
"""Load BigTV dataset"""
self.bigtv_dataset = pd.read_csv(filename, sep=sep)
def load_catalog(self, filename, sep):
"""Load BigTV catalog"""
self.bigtv_catalog = pd.read_csv(filename, sep=sep)
def prepare_dataset(self):
self.bigtv_dataset = self.bigtv_dataset[[
'fts', 'vts', 'uid', 'vcId', 'serverTs'
]]
self.bigtv_dataset = self.bigtv_dataset.dropna(axis=0, how="any")
pass
def extract_users(self):
self.users = []
data = self.extract_column(self.bigtv_dataset, 'uid')
for k, v in data.items():
data[k] = self.extract_column(
pd.concat(v, axis=1).transpose(), "vcId")
for key, value in data[k].items():
data[k][key] = pd.concat(value, axis=1).transpose()
arr = np.array([int(x) for x in data[k][key]['fts']])
data[k][key] = arr.max() - arr.min()
user = User(k, data[k])
self.users.append(user)
def prepare_catalog(self):
self.catalog = {}
catalog = self.bigtv_catalog[['VcID', 'VcName', 'VcSeries']]
for idx, row in catalog.iterrows():
self.catalog[row['VcID']] = [row['VcName'], row['VcSeries']]
def extract_column(self, df, column):
"""Extract column from pandas.DataFrame
Arguments:
df {pandas.DataFrame} -- input DataFrame
column {str} -- df column
"""
unique_values = list(set(df[column]))
gen = dict()
crop_df = df.drop(column, 1)
for value in unique_values:
gen[value] = []
for idx, row in crop_df.iterrows():
gen[df[column][idx]].append(row.to_frame())
return gen
def get_top_films(self, num=10):
top_films = []
for user in self.users:
[top_films.append(x) for x in user.watched if x[1] != 0]
top_films = dict(Counter(top_films))
top_films = sorted(top_films.items(), key=lambda x: x[1])
top_films = top_films[::-1]
return top_films[:num]
def find_users_by_parametrs(self, watched=2, zero_duration_permited=True):
pass