### Importing the Liberaries

In [99]:
import pandas as pd
import numpy as np
import datetime

### Loading the Data Set 

In [100]:
# File Path
file_path = "Analytics Position Case Study.xlsx"

#Loading "Deposit Data" Sheet from the file .
deposit_df = pd.read_excel(file_path, sheet_name="Deposit Data",header=3)

#Loading "Withdrawal Data" Sheet from the file .
withdrawal_df = pd.read_excel(file_path, sheet_name="Withdrawal Data",header=3)

#Loading "User Gameplay data" Sheet from the file .
gameplay_df = pd.read_excel(file_path, sheet_name="User Gameplay data",header=3)

In [101]:
#Deposit Data Sheet
deposit_df

Unnamed: 0,User Id,Datetime,Amount
0,357,2022-01-10 00:03:00,2000
1,776,2022-01-10 00:03:00,2500
2,492,2022-01-10 00:06:00,5000
3,803,2022-01-10 00:07:00,5000
4,875,2022-01-10 00:09:00,1500
...,...,...,...
17433,654,2022-10-31 23:57:00,1200
17434,980,2022-10-31 23:58:00,200
17435,2,2022-10-31 23:58:00,40000
17436,612,2022-10-31 23:58:00,2800


In [102]:
#Withdrawal Data Sheet
withdrawal_df

Unnamed: 0,User Id,Datetime,Amount
0,190,2022-01-10 00:03:00,5872
1,159,2022-01-10 00:16:00,9540
2,164,2022-01-10 00:24:00,815
3,946,2022-01-10 00:29:00,23000
4,763,2022-01-10 00:40:00,9473
...,...,...,...
3561,559,2022-10-31 23:27:00,5000
3562,407,2022-10-31 23:51:00,3000
3563,389,2022-10-31 23:56:00,14481
3564,11,2022-10-31 23:57:00,4000


In [103]:
#gameplay Data Sheet
gameplay_df

Unnamed: 0,User ID,Games Played,Datetime
0,851,1,2022-01-10 00:00:00
1,717,1,2022-01-10 00:00:00
2,456,1,2022-01-10 00:00:00
3,424,1,2022-01-10 00:00:00
4,845,1,2022-01-10 00:00:00
...,...,...,...
355261,658,1,2022-10-31 23:59:00
355262,582,1,2022-10-31 23:59:00
355263,272,1,2022-10-31 23:59:00
355264,563,1,2022-10-31 23:59:00


### Convert Date Columns to Datetime

In [104]:
deposit_df["Datetime"] = pd.to_datetime(deposit_df["Datetime"])
withdrawal_df["Datetime"] = pd.to_datetime(withdrawal_df["Datetime"])
gameplay_df["Datetime"] = pd.to_datetime(gameplay_df["Datetime"])

### Create Slot Labels : Playerwise Loyalty Points for Specific Slots
Split each date into Slot 1 (12am–12pm) and Slot 2 (12pm–12am):

In [105]:
def assign_slot(dt):
    if dt.time() < datetime.time(12, 0, 0):
        return "S1"
    else:
        return "S2"

for df in [deposit_df, withdrawal_df, gameplay_df]:
    df["Date"] = df["Datetime"].dt.date
    df["Slot"] = df["Datetime"].apply(assign_slot)

#Alternative solution
    #df["Datetime"] = pd.to_datetime(df["Datetime"])
    #df["Date"] = df["Datetime"].dt.date
    #df["Slot"] = df["Datetime"].apply(lambda x: "S1" if x.time() < datetime.time(12, 0, 0) else "S2")

In [106]:
deposit_df.head(1)

Unnamed: 0,User Id,Datetime,Amount,Date,Slot
0,357,2022-01-10 00:03:00,2000,2022-01-10,S1


In [107]:
withdrawal_df.head(1)

Unnamed: 0,User Id,Datetime,Amount,Date,Slot
0,190,2022-01-10 00:03:00,5872,2022-01-10,S1


In [108]:
gameplay_df.head(1)

Unnamed: 0,User ID,Games Played,Datetime,Date,Slot
0,851,1,2022-01-10,2022-01-10,S1


In [109]:
for dfd in [deposit_df, withdrawal_df, gameplay_df]:
    dfd["Date"] = dfd['Datetime'].dt.strftime('%Y-%m-%d')
    dfd["Time"] = dfd['Datetime'].dt.strftime('%H:%M:%S')

In [136]:
deposit_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 17438 entries, 0 to 17437
Data columns (total 6 columns):
 #   Column    Non-Null Count  Dtype         
---  ------    --------------  -----         
 0   User Id   17438 non-null  int64         
 1   Datetime  17438 non-null  datetime64[ns]
 2   Amount    17438 non-null  int64         
 3   Date      17438 non-null  datetime64[ns]
 4   Slot      17438 non-null  object        
 5   Time      17438 non-null  datetime64[ns]
dtypes: datetime64[ns](3), int64(2), object(1)
memory usage: 817.5+ KB


In [111]:
for dft in [deposit_df, withdrawal_df, gameplay_df]:
    dft["Date"] = pd.to_datetime(dft["Date"],format='%Y-%m-%d')
    dft["Time"] = pd.to_datetime(dft["Time"],format='%H:%M:%S')

In [112]:
deposit_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 17438 entries, 0 to 17437
Data columns (total 6 columns):
 #   Column    Non-Null Count  Dtype         
---  ------    --------------  -----         
 0   User Id   17438 non-null  int64         
 1   Datetime  17438 non-null  datetime64[ns]
 2   Amount    17438 non-null  int64         
 3   Date      17438 non-null  datetime64[ns]
 4   Slot      17438 non-null  object        
 5   Time      17438 non-null  datetime64[ns]
dtypes: datetime64[ns](3), int64(2), object(1)
memory usage: 817.5+ KB


### Filter for Target Dates

Find Playerwise Loyalty points earned by Players in the following slots:-

    a. 2nd October Slot S1


In [113]:
deposit_df["Date"].unique()

<DatetimeArray>
['2022-01-10 00:00:00', '2022-02-10 00:00:00', '2022-03-10 00:00:00',
 '2022-04-10 00:00:00', '2022-05-10 00:00:00', '2022-06-10 00:00:00',
 '2022-07-10 00:00:00', '2022-08-10 00:00:00', '2022-09-10 00:00:00',
 '2022-10-10 00:00:00', '2022-11-10 00:00:00', '2022-12-10 00:00:00',
 '2022-10-13 00:00:00', '2022-10-14 00:00:00', '2022-10-15 00:00:00',
 '2022-10-16 00:00:00', '2022-10-17 00:00:00', '2022-10-18 00:00:00',
 '2022-10-19 00:00:00', '2022-10-20 00:00:00', '2022-10-21 00:00:00',
 '2022-10-22 00:00:00', '2022-10-23 00:00:00', '2022-10-24 00:00:00',
 '2022-10-25 00:00:00', '2022-10-26 00:00:00', '2022-10-27 00:00:00',
 '2022-10-28 00:00:00', '2022-10-29 00:00:00', '2022-10-30 00:00:00',
 '2022-10-31 00:00:00']
Length: 31, dtype: datetime64[ns]

Since there is no date on 2nd Oct 2022 . Instead i will compute with 10 Oct 2022

In [114]:
filtered_oct10_dep = deposit_df[
    (deposit_df["Date"] == "2022-10-10") &
    (deposit_df["Slot"] == "S2")]

In [115]:
filtered_oct10_dep

Unnamed: 0,User Id,Datetime,Amount,Date,Slot,Time
5485,294,2022-10-10 12:03:00,18000,2022-10-10,S2,1900-01-01 12:03:00
5486,80,2022-10-10 12:03:00,5900,2022-10-10,S2,1900-01-01 12:03:00
5487,763,2022-10-10 12:06:00,2000,2022-10-10,S2,1900-01-01 12:06:00
5488,202,2022-10-10 12:09:00,25000,2022-10-10,S2,1900-01-01 12:09:00
5489,980,2022-10-10 12:10:00,2500,2022-10-10,S2,1900-01-01 12:10:00
...,...,...,...,...,...,...
5763,566,2022-10-10 23:43:00,35000,2022-10-10,S2,1900-01-01 23:43:00
5764,296,2022-10-10 23:46:00,1250,2022-10-10,S2,1900-01-01 23:46:00
5765,510,2022-10-10 23:48:00,3500,2022-10-10,S2,1900-01-01 23:48:00
5766,343,2022-10-10 23:51:00,7500,2022-10-10,S2,1900-01-01 23:51:00


In [116]:
filtered_oct10_wit = withdrawal_df[
    (withdrawal_df["Date"] == "2022-10-10") &
    (withdrawal_df["Slot"] == "S2")]

In [117]:
filtered_oct10_wit

Unnamed: 0,User Id,Datetime,Amount,Date,Slot,Time
1095,920,2022-10-10 12:16:00,110950,2022-10-10,S2,1900-01-01 12:16:00
1096,159,2022-10-10 12:48:00,8528,2022-10-10,S2,1900-01-01 12:48:00
1097,157,2022-10-10 13:03:00,21111,2022-10-10,S2,1900-01-01 13:03:00
1098,497,2022-10-10 13:07:00,15793,2022-10-10,S2,1900-01-01 13:07:00
1099,666,2022-10-10 13:32:00,2000,2022-10-10,S2,1900-01-01 13:32:00
...,...,...,...,...,...,...
1155,185,2022-10-10 23:08:00,1402,2022-10-10,S2,1900-01-01 23:08:00
1156,148,2022-10-10 23:09:00,15000,2022-10-10,S2,1900-01-01 23:09:00
1157,201,2022-10-10 23:10:00,13275,2022-10-10,S2,1900-01-01 23:10:00
1158,380,2022-10-10 23:20:00,7000,2022-10-10,S2,1900-01-01 23:20:00


In [118]:
filtered_oct10_gam = gameplay_df[
    (gameplay_df["Date"] == "2022-10-10") &
    (gameplay_df["Slot"] == "S2")]

In [119]:
filtered_oct10_gam

Unnamed: 0,User ID,Games Played,Datetime,Date,Slot,Time
109227,181,1,2022-10-10 12:00:00,2022-10-10,S2,1900-01-01 12:00:00
109228,39,1,2022-10-10 12:00:00,2022-10-10,S2,1900-01-01 12:00:00
109229,536,1,2022-10-10 12:00:00,2022-10-10,S2,1900-01-01 12:00:00
109230,765,1,2022-10-10 12:00:00,2022-10-10,S2,1900-01-01 12:00:00
109231,663,1,2022-10-10 12:00:00,2022-10-10,S2,1900-01-01 12:00:00
...,...,...,...,...,...,...
114864,738,1,2022-10-10 23:59:00,2022-10-10,S2,1900-01-01 23:59:00
114865,946,1,2022-10-10 23:59:00,2022-10-10,S2,1900-01-01 23:59:00
114866,318,1,2022-10-10 23:59:00,2022-10-10,S2,1900-01-01 23:59:00
114867,710,1,2022-10-10 23:59:00,2022-10-10,S2,1900-01-01 23:59:00


In [120]:
# Example for deposits
deposit_summary_oct10 = filtered_oct10_dep.groupby("User Id")["Amount"].agg(["sum", "count"])
deposit_summary_oct10.columns = ["Deposit_Amount", "Num_Deposits"]


In [121]:
deposit_summary

Unnamed: 0_level_0,Deposit_Amount,Num_Deposits
User Id,Unnamed: 1_level_1,Unnamed: 2_level_1
5,1000,2
9,1995,1
16,3500,1
25,3000,1
30,10000,1
...,...,...
972,1500,3
978,35001,1
980,2800,2
985,2000,1


In [122]:
# Example for withdrawal
withdrawal_summary_oct10 = filtered_oct10_wit.groupby("User Id")["Amount"].agg(["sum", "count"])
withdrawal_summary_oct10.columns = ["Withdrawal_Amount", "Num_Withdrawal"]

In [123]:
withdrawal_summary_oct10

Unnamed: 0_level_0,Withdrawal_Amount,Num_Withdrawal
User Id,Unnamed: 1_level_1,Unnamed: 2_level_1
2,42000,1
16,7500,1
33,7000,1
47,5900,1
60,7000,1
64,2000,1
78,25000,1
93,10100,1
112,2116,1
114,7000,1


In [124]:
# Example for gameplay
gameplay_summary_oct10 = filtered_oct10_gam.groupby("User ID").size().reset_index(name="Games_Played")
gameplay_summary_oct10


Unnamed: 0,User ID,Games_Played
0,0,1
1,2,2
2,5,5
3,6,1
4,8,8
...,...,...
530,991,1
531,992,34
532,996,11
533,997,2


### Merge All Summaries


In [125]:
merged_df_oct10 = deposit_summary_oct10.merge(withdrawal_summary_oct10, on="User Id", how="outer")
merged_df_oct10 = pd.merge(merged_df_oct10,gameplay_summary_oct10,left_on="User Id",right_on="User ID",how="outer")
merged_df_oct10 = merged_df_oct10.fillna(0)

In [126]:
merged_df_oct10

Unnamed: 0,Deposit_Amount,Num_Deposits,Withdrawal_Amount,Num_Withdrawal,User ID,Games_Played
0,0.0,0.0,0.0,0.0,0.0,1.0
1,0.0,0.0,42000.0,1.0,2.0,2.0
2,1000.0,2.0,0.0,0.0,5.0,5.0
3,0.0,0.0,0.0,0.0,6.0,1.0
4,0.0,0.0,0.0,0.0,8.0,8.0
...,...,...,...,...,...,...
628,0.0,0.0,0.0,0.0,991.0,1.0
629,0.0,0.0,0.0,0.0,992.0,34.0
630,4000.0,1.0,0.0,0.0,996.0,11.0
631,0.0,0.0,0.0,0.0,997.0,2.0


## Apply Loyalty Point Formula

In [128]:
merged_df_oct10["Loyalty Points"] = (
    0.01 * merged_df_oct10["Deposit_Amount"] +
    0.005 * merged_df_oct10["Withdrawal_Amount"] +
    0.001 * (merged_df_oct10["Num_Deposits"] - merged_df_oct10["Num_Withdrawal"]).clip(lower=0) +
    0.2 * merged_df_oct10["Games_Played"]
)


In [129]:
merged_df_oct10

Unnamed: 0,Deposit_Amount,Num_Deposits,Withdrawal_Amount,Num_Withdrawal,User ID,Games_Played,Loyalty Points
0,0.0,0.0,0.0,0.0,0.0,1.0,0.200
1,0.0,0.0,42000.0,1.0,2.0,2.0,210.400
2,1000.0,2.0,0.0,0.0,5.0,5.0,11.002
3,0.0,0.0,0.0,0.0,6.0,1.0,0.200
4,0.0,0.0,0.0,0.0,8.0,8.0,1.600
...,...,...,...,...,...,...,...
628,0.0,0.0,0.0,0.0,991.0,1.0,0.200
629,0.0,0.0,0.0,0.0,992.0,34.0,6.800
630,4000.0,1.0,0.0,0.0,996.0,11.0,42.201
631,0.0,0.0,0.0,0.0,997.0,2.0,0.400


In [134]:
merged_df_oct10["User ID"] = merged_df_oct10["User ID"].astype(int)

merged_df_oct10["Num_Deposits"] = merged_df_oct10["Num_Deposits"].astype(int)

merged_df_oct10["Num_Withdrawal"] = merged_df_oct10["Num_Withdrawal"].astype(int)

merged_df_oct10["Games_Played"] = merged_df_oct10["Games_Played"].astype(int)

desired_order = [
    "User ID",
    "Games_Played",
    "Deposit_Amount",
    "Num_Deposits",
    "Withdrawal_Amount",
    "Num_Withdrawal",
    "Loyalty Points"
]

merged_df_oct10 = merged_df_oct10[desired_order]

In [135]:
merged_df_oct10

Unnamed: 0,User ID,Games_Played,Deposit_Amount,Num_Deposits,Withdrawal_Amount,Num_Withdrawal,Loyalty Points
0,0,1,0.0,0,0.0,0,0.200
1,2,2,0.0,0,42000.0,1,210.400
2,5,5,1000.0,2,0.0,0,11.002
3,6,1,0.0,0,0.0,0,0.200
4,8,8,0.0,0,0.0,0,1.600
...,...,...,...,...,...,...,...
628,991,1,0.0,0,0.0,0,0.200
629,992,34,0.0,0,0.0,0,6.800
630,996,11,4000.0,1,0.0,0,42.201
631,997,2,0.0,0,0.0,0,0.400
