**Aram Adamyan**

In [1]:
import pandas as pd
from datetime import datetime

# Understanding the Data 

In [2]:
df = pd.read_csv("recsys_data.csv")

In [3]:
df

Unnamed: 0,user,survey,status,date
0,46120029,100000000,0,2020-06-25 16:08:06
1,61685181,100000001,0,2020-06-24 12:29:43
2,61900560,100000002,0,2020-06-24 20:35:37
3,26620554,100000003,0,2020-06-24 22:04:23
4,61973223,100000004,1,2020-06-25 15:15:02
...,...,...,...,...
1313319,60022440,100001086,0,2020-06-27 15:50:06
1313320,61899999,100019505,1,2020-06-27 09:26:41
1313321,58757238,100003562,0,2020-06-27 13:22:35
1313322,47976315,100002418,0,2020-06-27 19:27:23


In [4]:
df.dtypes

user       int64
survey     int64
status     int64
date      object
dtype: object

In [5]:
df.isna().sum()

user      0
survey    0
status    0
date      0
dtype: int64

In [6]:
df.user.nunique()

83126

In [7]:
df.survey.nunique()

42445

In [8]:
df[df.status==1].user.nunique()

48474

In [9]:
df[df.status==1].survey.nunique()

19187

# Modifying and filtering the dataframe

In [10]:
df.date = pd.to_datetime(df.date) # making the date column from string to datetime type

In [11]:
df.dtypes

user               int64
survey             int64
status             int64
date      datetime64[ns]
dtype: object

In [12]:
df1 = df[df.status==1] # filtered dataframe where it only includes the completed rows, where status == 1
df1= df1.sort_values(by='date').reset_index() #sort the df1 by increasing order of date, and reseting the index.
df1 = df1.iloc[170000:,:]  # cutting the dataframe to show the surveys of the last 14 hours
df1

Unnamed: 0,index,user,survey,status,date
170000,1289395,58312209,100002748,1,2020-06-27 14:26:12
170001,909674,62004357,100018638,1,2020-06-27 14:26:13
170002,198986,53128233,100007562,1,2020-06-27 14:26:17
170003,682812,61334820,100003689,1,2020-06-27 14:26:17
170004,1256109,52709025,100002144,1,2020-06-27 14:26:19
...,...,...,...,...,...
212122,847771,61891191,100028890,1,2020-06-28 04:15:56
212123,1065870,62076915,100003691,1,2020-06-28 04:15:57
212124,1002493,61928229,100007062,1,2020-06-28 04:16:02
212125,870213,60606300,100028055,1,2020-06-28 04:16:06


In [13]:
print(df1['survey'].value_counts().to_string()) # we can see the surveys and the count that have been completed.

100001069    692
100002169    589
100003774    460
100004369    457
100002577    421
100003670    417
100000111    370
100000463    349
100007366    345
100010884    316
100000136    301
100002971    287
100000979    286
100005554    282
100005288    281
100003728    277
100000025    277
100000034    272
100000891    262
100006379    258
100001244    258
100006055    253
100003689    252
100000355    242
100001588    217
100001766    197
100000723    197
100006888    180
100001957    176
100000165    174
100002726    172
100000557    171
100001564    166
100007809    164
100000264    163
100011057    161
100009799    156
100007120    153
100002144    149
100002561    149
100002207    143
100003075    143
100003046    140
100007261    140
100000084    140
100001850    139
100001952    139
100000009    135
100018391    135
100002319    131
100007675    127
100001086    127
100002861    127
100001593    121
100004895    120
100003644    118
100009108    116
100002277    116
100007191    1

# Logic of recommendation

In [14]:
def recommender(userId): # takes input one of the userId
    a =list(df1['survey'].value_counts().index) # the survey id's sorted by the highest completion to lowest which is once.
    s = df[df['user']==userId].survey # All the surveys for the given user.
    r = set()

    for i in a:
        for j in s:
            if j!=i:
                r.add(i)
            elif len(r)==0:
                break
            else:
                r.remove(i)
                break

        if len(r)==3:
            break
    return r   # returns a set where it has the id's of the best 3 surveys for that specific user 

In [15]:
recommender(62004357)

{100001069, 100002169, 100003774}

**in the first for loop we take each element of of the unique completed survey IDs and compare with each survey of the given user with sencond for loop, and if there is a match wwe skip that survey if there isn't any match we add to our set, and when the elements of the set reacch to 3 we break the loops and return the set.
NOTE: the surveys are listed from highest completion rate to lowest and are only considered the surveys that are within the last 14 hours of the given data**

# END