This is similar to the event file created for creating feature split but also includes a probability distirbution for click in order to get click events activated

## Setup

In [3]:
import random 
import numpy as np
import pandas as pd
import gzip
import jsonlines
import datetime



### Now, instead of 1000 interactions for 10 users and 10 items, we have 20 interactions for 10 users/10 items 

In [1]:

no_of_interactions  = 20
no_of_rankings = 10
no_of_users =  10
no_of_items = 10
no_of_items_per_ranking =  10

time_start=  1638316800000   ## 2021-12-01 00:00:00
time_end =  1640908800000  ## 2021-12-31 00:00:00

In [4]:
sample_prob =  pd.read_csv('sample_probabilities.csv')
sample_prob.users = sample_prob['users'].astype(str)
sample_prob2 =  sample_prob.copy()
sample_prob2.users = np.array(['6','7','8','9','10'])
sample_prob2.columns = ['P_users', 'users', '6','7','8','9','10']

In [5]:
sample_prob

Unnamed: 0,P_users,users,1,2,3,4,5
0,0.4,1,0.17,0.2,0.19,0.25,0.19
1,0.2,2,0.28,0.15,0.19,0.2,0.18
2,0.2,3,0.28,0.15,0.19,0.2,0.18
3,0.1,4,0.41,0.15,0.15,0.14,0.15
4,0.1,5,0.41,0.15,0.15,0.14,0.15


The above is the probablity distribution for users 1-5 for items 1-5. e.g. User 1 has probability of picking item 1 of 0.17. These users have no probability of picking items 5-10

In [10]:
sample_prob2

Unnamed: 0,P_users,users,6,7,8,9,10
0,0.4,6,0.17,0.2,0.19,0.25,0.19
1,0.2,7,0.28,0.15,0.19,0.2,0.18
2,0.2,8,0.28,0.15,0.19,0.2,0.18
3,0.1,9,0.41,0.15,0.15,0.14,0.15
4,0.1,10,0.41,0.15,0.15,0.14,0.15


The above is the probablity distribution for users 1-5 for items 6-10. e.g. User 6 has probability 0.17 of picking item 6. These users have no probability of picking items 1-5

### Description:

- Two kinds of users : 
    - Users with crop = 'kharif': They click on only items 1-5 
    - Users with crop = 'rabi' : They click on only items 6-10
- Two kinds of items : 
    - Items 1-5: They have fertilizer = 'fertrilizer_1' and persticide in {'pesticide_2,pesticide_3}
    - Items 6-10: They have fertilizer in {'fertilizer_1,fertilizer_3'} and pesticide = pesticide_1

### Creating users : 

In [6]:
user_dic1 = []

def condition_crop(id1):
    if id1 < no_of_users/2:
        crop = 'kharif'
    else:
        crop = 'rabi'
    return(crop)


for id1 in range(no_of_users):
   
    user_dic1.append({
        "event": "user",
        "id": str(id1 + 1),
        "timestamp":  time_start,
        "user": str(id1+1),
        "fields": [{
                "name": "land_area",
                "value": random.randint(100,250)/100.00
                },
            {
                "name": "crops",
                "value": condition_crop(id1)
                }]
    })

user_dic1[4:6]

[{'event': 'user',
  'id': '5',
  'timestamp': 1638316800000,
  'user': '5',
  'fields': [{'name': 'land_area', 'value': 1.23},
   {'name': 'crops', 'value': 'kharif'}]},
 {'event': 'user',
  'id': '6',
  'timestamp': 1638316800000,
  'user': '6',
  'fields': [{'name': 'land_area', 'value': 1.69},
   {'name': 'crops', 'value': 'rabi'}]}]

### Creating items:

In [7]:
item_dic1 = []

def condition_pesticide(id1):
    if id1 < no_of_items/2:
        pesticide = 'pesticide_1'
    else:
        pesticide = random.choice(['pesticide_2','pesticide_3'])
    return(pesticide)

def condition_fertilizer(id1):
    if id1 < no_of_items/2:
        fertilizer = random.choice(['fertilizer_2','fertilizer_3'])
    else:
        fertilizer = 'fertilizer_1'
    return(fertilizer)

for id1 in range(no_of_items):
   
    item_dic1.append({
        "event": "item",
        "id": str(id1 + 1),
        "timestamp":  time_start,
        "item": str(id1+1),
        "fields": [{
                "name": "pesticide",
                "value": condition_pesticide(id1)
                },
            {
                "name": "fertilizer",
                "value": condition_fertilizer(id1)
                }]
    })

item_dic1[0:2]

[{'event': 'item',
  'id': '1',
  'timestamp': 1638316800000,
  'item': '1',
  'fields': [{'name': 'pesticide', 'value': 'pesticide_1'},
   {'name': 'fertilizer', 'value': 'fertilizer_3'}]},
 {'event': 'item',
  'id': '2',
  'timestamp': 1638316800000,
  'item': '2',
  'fields': [{'name': 'pesticide', 'value': 'pesticide_1'},
   {'name': 'fertilizer', 'value': 'fertilizer_2'}]}]

### Creating rankings dictionary

In [8]:
## creating 10 rankings for 10 random users based on 10 items randomly placed

ranking_dic1 = []
for id1 in range(no_of_rankings):
    user_id = str(id1 + 1)
    ranking_id =  random.sample(range(1,no_of_items+1),no_of_items_per_ranking)
    rank_list= []
    for id2 in range(no_of_rankings):
        rank_list.append({
                "id" : str(ranking_id[id2]),
                "relevancy": 0.0
            })
    ranking_dic1.append({
        "event": "ranking",
        "fields":[],
        "id": str(id1 + 1),
        "items": rank_list,
        "session": str(user_id),
        "tenant": "default",
        "timestamp":  time_start,
        "user": str(user_id)
    })
    
ranking_dic1[0:2]

[{'event': 'ranking',
  'fields': [],
  'id': '1',
  'items': [{'id': '3', 'relevancy': 0.0},
   {'id': '7', 'relevancy': 0.0},
   {'id': '8', 'relevancy': 0.0},
   {'id': '5', 'relevancy': 0.0},
   {'id': '9', 'relevancy': 0.0},
   {'id': '2', 'relevancy': 0.0},
   {'id': '4', 'relevancy': 0.0},
   {'id': '1', 'relevancy': 0.0},
   {'id': '10', 'relevancy': 0.0},
   {'id': '6', 'relevancy': 0.0}],
  'session': '1',
  'tenant': 'default',
  'timestamp': 1638316800000,
  'user': '1'},
 {'event': 'ranking',
  'fields': [],
  'id': '2',
  'items': [{'id': '5', 'relevancy': 0.0},
   {'id': '6', 'relevancy': 0.0},
   {'id': '1', 'relevancy': 0.0},
   {'id': '10', 'relevancy': 0.0},
   {'id': '8', 'relevancy': 0.0},
   {'id': '3', 'relevancy': 0.0},
   {'id': '7', 'relevancy': 0.0},
   {'id': '2', 'relevancy': 0.0},
   {'id': '4', 'relevancy': 0.0},
   {'id': '9', 'relevancy': 0.0}],
  'session': '2',
  'tenant': 'default',
  'timestamp': 1638316800000,
  'user': '2'}]

### Creating interactions dictionary

In [9]:
## creating 50 interactions such that the first 5 users (id - 1 to 5)  interact only with first 5 items(id - 1 to 5) :

interaction_dic1 = []
for id1 in range(int(no_of_interactions/2)):
    prob = sample_prob.P_users.values
    user_id = str(np.random.choice(sample_prob.users,size = 1,p = prob)[0])
    ranking_id = [x['id'] for x in ranking_dic1 if x['user']== user_id][0]
    prob_item = sample_prob.loc[sample_prob.users == user_id,sample_prob.columns[2:sample_prob.shape[1]]].values[0]
    item_id = str(np.random.choice(sample_prob.columns[2:sample_prob.shape[1]].values,size = 1,p = prob_item)[0])
    interaction_dic1.append({
        "event": "interaction",
        "fields":[],
        "id": str(id1 + 1),
        "item": item_id,
        "ranking":  ranking_id,
        "session": str(user_id),
        "tenant": "default",
        "timestamp":  random.randint(time_start+1,time_end),
        "type":"click",
        "user": str(user_id)
    })
    
## creating 50 interactions such that the other 5 users (id - 5 to 10)  interact only with the other 5 items (id - 5 to 10)
    
interaction_dic2 = []
for id1 in range(int(no_of_interactions/2),no_of_interactions):
    user_id = str(np.random.choice(sample_prob2.users,size = 1,p = prob)[0])
    ranking_id = [x['id'] for x in ranking_dic1 if x['user']== user_id][0]
    prob_item = sample_prob2.loc[sample_prob2.users == user_id,sample_prob2.columns[2:sample_prob2.shape[1]]].values[0]
    item_id = str(np.random.choice(sample_prob2.columns[2:sample_prob2.shape[1]].values,size = 1,p = prob_item)[0])
    interaction_dic2.append({
        "event": "interaction",
        "fields":[],
        "id": str(id1 + 1),
        "item": item_id,
        "ranking":  ranking_id,
        "session": str(user_id),
        "tenant": "default",
        "timestamp":  random.randint(time_start+1,time_end),
        "type":"click",
        "user": str(user_id)
    })

In [10]:
interaction_dic = interaction_dic1.copy()
interaction_dic.extend(interaction_dic2)
interaction_dic[500:502]

[]

### Combining the events created :  

In [11]:
## functions for writing into gzip 
def writeall_jsonl_gz(filename, payload ):
    with gzip.open(filename, 'wb') as fp:
        json_writer = jsonlines.Writer(fp)
        json_writer.write_all(payload)


## combining interactions nad rankings:
events_list =  interaction_dic.copy()
events_list.extend(ranking_dic1)

##combining users and items:
events_list_doc=  events_list.copy()
events_list_doc.extend( user_dic1)
events_list_doc.extend (item_dic1)

##saving the created events: 
filename = 'data/event_kharif_rabi_with_clicks_20_interactions.jsonl.gz'
payload  = events_list_doc
writeall_jsonl_gz(filename, payload )

### Model after training: 

![kharif_Rabi_20_interactions.png](attachment:kharif_Rabi_20_interactions.png)

clicks not picked up

### config file:

```
bootstrap:
  source:
    type: file
    path: /data/events
  workdir: /data/bootstrap

inference:
  port: 8080
  host: "0.0.0.0"
  source:
    type: rest
  state:
    type: redis
    host: redis
    format: json

models:
  xgboost:
    type: lambdamart
    path: /data/xgboost.model
    backend:
      type: xgboost
      iterations: 10
      seed: 0
    weights:
      click: 1
    features:
      - pesticide
      - fertilizer
      - crops
      - land_area
      - ctr
      - liked_fertilizer
      - visitor_click_count
      - global_item_click_count
      - day_item_click_count
features:
  - name: pesticide
    type: string
    scope: item
    source: item.pesticide
    values:
      - pesticide_1
      - pesticide_2
      - pesticide_3

  - name: fertilizer
    type: string
    scope: item
    source: item.fertilizer
    values:
      - fertilizer_1
      - fertilizer_2
      - fertilizer_3

  - name: crops
    type: string
    scope: user
    source: user.crops
    values:
      - rabi
      - kharif

  - name: land_area
    type: number
    scope: user
    source: user.land_area

  - name: ctr
    type: rate
    top: click
    bottom: impression
    scope: item
    bucket: 24h
    periods: [7,30]

  - name: liked_fertilizer
    type: interacted_with
    interaction: click
    field: item.fertilizer
    scope: session
    count: 10
    duration: 24h

  - name: visitor_click_count
    type: interaction_count
    interaction: click
    scope: session

  - name: global_item_click_count
    type: interaction_count
    interaction: click
    scope: item

  - name: day_item_click_count
    type: window_count
    interaction: click
    scope: item
    bucket: 24h
    periods: [7,30]
```