In [3]:
import pandas as pd
import numpy as np
import random
from datetime import datetime, timedelta

In [4]:
# Generate random search data
n = 1000  # Number of rows

In [5]:
search_data = pd.DataFrame({
    'timestamp': [datetime(2024, 1, 1) + timedelta(minutes=random.randint(1, 10080)) for _ in range(n)],
    'session_id': [random.randint(1, 100) for _ in range(n)],
    'query': [random.choice(['shoes', 'electronics', 'clothing', 'home decor']) for _ in range(n)],
    'location': [random.choice(['New York', 'Los Angeles', 'Chicago', 'San Francisco']) for _ in range(n)],
    'user_id': [random.randint(1, 50) for _ in range(n)]
})

In [6]:
session_data = pd.DataFrame({
    'session_id': [random.randint(1, 100) for _ in range(n)],
    'platform': [random.choice(['IOS', 'ANDROID', 'WEB']) for _ in range(n)],
})

In [7]:
search_data.head()

Unnamed: 0,timestamp,session_id,query,location,user_id
0,2024-01-05 14:56:00,36,shoes,Los Angeles,38
1,2024-01-03 20:33:00,5,home decor,Chicago,50
2,2024-01-05 02:52:00,72,electronics,Chicago,39
3,2024-01-05 19:07:00,27,electronics,Los Angeles,37
4,2024-01-01 00:20:00,36,clothing,Chicago,41


In [8]:
session_data.head()

Unnamed: 0,session_id,platform
0,83,ANDROID
1,60,WEB
2,8,ANDROID
3,95,IOS
4,74,WEB


Выведи на экран 10 самых популярных запросов 

In [None]:
query_count = search_data.dropna(subset=['query'])\
    .groupby('query', as_index=False)\
    .size()\
    .rename(columns={'size': 'query_count'})\
    .sort_values('query_count', ascending = [False])\
    .head(10)

In [27]:
query_count

Unnamed: 0,query,query_count
0,clothing,266
2,home decor,256
1,electronics,243
3,shoes,235


Выведи на экран кол-во пользователей IOS по городам

In [39]:
ios_location = search_data.merge(\
    session_data,\
    how='left',\
    on='session_id')\
    .dropna(subset=['platform'])\
    .query('platform == "IOS"')\
    .groupby('location', as_index=False)\
    .agg({'user_id': 'nunique'})\
    .rename(columns={'user_id': 'count_user'})


In [40]:
ios_location

Unnamed: 0,location,count_user
0,Chicago,50
1,Los Angeles,50
2,New York,50
3,San Francisco,50


Посчитай долю уникальных пользователей с запросом 'shoes' по платформам

In [47]:
merged = search_data.merge(
    session_data,
    how='left',
    on='session_id')\
    .dropna(subset=['platform'])

unique_shoes = merged.groupby('platform', as_index=False)\
    .agg(
        shoes_users=('user_id', lambda x: x[merged['query'] == 'shoes'].nunique()),
        total_users=('user_id', 'nunique')
    )\
    .assign(share=lambda x: x['shoes_users'] / x['total_users'])

In [51]:
shoes_users = search_data.merge(
    session_data,
    how='left',
    on='session_id')\
    .dropna(subset=['platform'])\
    .query('query == "shoes"')\
    .groupby('platform', as_index=False)\
    .agg(shoes_users=('user_id', 'nunique'))

total_users = search_data.merge(
    session_data,
    how='left',
    on='session_id')\
    .dropna(subset=['platform'])\
    .groupby('platform', as_index=False)\
    .agg(total_users=('user_id', 'nunique'))

unique_shoes = shoes_users.merge(
    total_users,
    how='left',
    on='platform')\
    .assign(share=lambda x: x['shoes_users'] / x['total_users'])

In [52]:
unique_shoes

Unnamed: 0,platform,shoes_users,total_users,share
0,ANDROID,50,50,1.0
1,IOS,50,50,1.0
2,WEB,50,50,1.0


In [53]:
total_users = search_data['user_id'].nunique()

In [54]:
total_users

50