In [1]:
import os
import json
import pandas as pd
from datetime import datetime
import pandasql as ps

In [2]:
pd.set_option('display.float_format', lambda x: '%.0f' % x)

In [3]:
CURRENT_WORKING_DIRECTORY = os.getcwd()
INPUTS = CURRENT_WORKING_DIRECTORY + "\\inputs"
SEPARATOR=";"
PLAYERS = "Players.csv"
SELFBANS = "Selfbans.csv"
TRANSACTIONS = "Transactions.csv"

In [4]:
CONFIG_FILE = "config.json"
with open(CURRENT_WORKING_DIRECTORY + "\\" + CONFIG_FILE, encoding='utf-8') as f:
    CONFIG = json.load(f)

In [5]:
PLAYERS_DATES = CONFIG["PLAYERS"]["DATES"]
PLAYERS_SEPARATOR = CONFIG["PLAYERS"]["SEPARATOR"]
PLAYERS_DTYPE = CONFIG["PLAYERS"]["DTYPE"] 

In [6]:
SELFBANS_DATES = CONFIG["SELFBANS"]["DATES"]
SELFBANS_SEPARATOR = CONFIG["SELFBANS"]["SEPARATOR"]
SELFBANS_DTYPE = CONFIG["SELFBANS"]["DTYPE"] 

In [7]:
TRANSACTIONS_DATES = CONFIG["TRANSACTIONS"]["DATES"]
TRANSACTIONS_SEPARATOR = CONFIG["TRANSACTIONS"]["SEPARATOR"]
TRANSACTIONS_DTYPE = CONFIG["TRANSACTIONS"]["DTYPE"] 

In [8]:
def myparser(x):
    """
    """
#     try:
#         datetime.utcfromtimestamp(int(x))
#     except:
#         return pd.NaT
#    return dt.datetime.strptime(x, '%Y-%m-%d %H:%M:%S')
    return datetime.utcfromtimestamp(int(x))

## PLAYERS

In [9]:
players = pd.read_csv(
    INPUTS + "\\" + PLAYERS,
    delimiter=PLAYERS_SEPARATOR,
    parse_dates=PLAYERS_DATES,
    dtype=PLAYERS_DTYPE,
    date_parser=myparser
)

In [10]:
players.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 29 entries, 0 to 28
Data columns (total 10 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   AccountId          29 non-null     object        
 1   PlayerNr           29 non-null     object        
 2   Country            29 non-null     object        
 3   DateOfBirth        29 non-null     datetime64[ns]
 4   Source             29 non-null     object        
 5   Selfban_start      29 non-null     datetime64[ns]
 6   Selfban_end        29 non-null     datetime64[ns]
 7   First_genre        29 non-null     object        
 8   Registration_date  29 non-null     datetime64[ns]
 9   Currency           29 non-null     object        
dtypes: datetime64[ns](4), object(6)
memory usage: 2.4+ KB


In [22]:
players.head(5)

Unnamed: 0,AccountId,PlayerNr,Country,DateOfBirth,Source,Selfban_start,Selfban_end,First_genre,Registration_date,Currency
0,1,101,PL,1988-02-07 00:31:26,Facebook,2022-02-06 23:17:49,2022-02-08 03:04:29,arcade,2022-02-06 23:17:49,PLN
1,2,102,UK,1988-02-08 04:18:06,direct,2022-02-06 06:37:49,2022-02-07 10:24:29,arcade,2021-12-22 16:11:09,GBP
2,3,103,FR,1988-02-09 08:04:46,web,2022-02-05 13:57:49,2022-02-06 17:44:29,arcade,2021-11-06 09:04:29,EUR
3,4,104,DE,1988-02-10 11:51:26,Facebook,2022-02-04 21:17:49,2022-02-06 01:04:29,arcade,2021-09-21 01:57:49,PLN
4,5,105,DK,1988-02-11 15:38:06,direct,2022-02-04 04:37:49,2022-02-05 08:24:29,arcade,2021-08-05 18:51:09,GBP


In [42]:
players["AccountId"].unique()

array(['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12',
       '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23',
       '24', '25', '26', '27', '28', '29'], dtype=object)

In [41]:
players["AccountId"].nunique()

29

In [44]:
players["PlayerNr"].unique()

array(['101', '102', '103', '104', '105', '106', '107', '108', '109',
       '110', '111', '112', '113', '114', '115', '116', '117', '118',
       '119', '120', '121', '122', '123', '124', '125', '126', '127',
       '128', '129'], dtype=object)

In [45]:
players["PlayerNr"].nunique()

29

In [63]:
players["Source"].unique()

array(['Facebook', 'direct', 'web'], dtype=object)

In [64]:
players["Source"].nunique()

3

In [61]:
players["First_genre"].unique()

array(['arcade'], dtype=object)

In [62]:
players["First_genre"].nunique()

1

In [65]:
players["Currency"].unique()

array(['PLN', 'GBP', 'EUR'], dtype=object)

In [66]:
players["Currency"].nunique()

3

## SELFBANS

In [12]:
selfbans = pd.read_csv(
    INPUTS + "\\" + SELFBANS,
    delimiter=SELFBANS_SEPARATOR,
    dtype=SELFBANS_DTYPE,
    parse_dates=SELFBANS_DATES,
    date_parser=myparser
)

In [13]:
selfbans.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 58 entries, 0 to 57
Data columns (total 3 columns):
 #   Column         Non-Null Count  Dtype         
---  ------         --------------  -----         
 0   AccountId      58 non-null     object        
 1   Selfban_start  58 non-null     datetime64[ns]
 2   Selfban_end    58 non-null     datetime64[ns]
dtypes: datetime64[ns](2), object(1)
memory usage: 1.5+ KB


In [23]:
selfbans.head()

Unnamed: 0,AccountId,Selfban_start,Selfban_end
0,1,2022-02-06 23:17:49,2022-02-08 03:04:29
1,2,2022-02-06 06:37:49,2022-02-07 10:24:29
2,3,2022-02-05 13:57:49,2022-02-06 17:44:29
3,4,2022-02-04 21:17:49,2022-02-06 01:04:29
4,5,2022-02-04 04:37:49,2022-02-05 08:24:29


In [46]:
selfbans["AccountId"].unique()

array(['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12',
       '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23',
       '24', '25', '26', '27', '28', '29'], dtype=object)

In [47]:
selfbans["AccountId"].nunique()

29

In [49]:
selfbans["Selfban_start"].unique()

array(['2022-02-06T23:17:49.000000000', '2022-02-06T06:37:49.000000000',
       '2022-02-05T13:57:49.000000000', '2022-02-04T21:17:49.000000000',
       '2022-02-04T04:37:49.000000000', '2022-02-03T11:57:49.000000000',
       '2022-02-02T19:17:49.000000000', '2022-02-02T02:37:49.000000000',
       '2022-02-01T09:57:49.000000000', '1970-01-01T00:00:00.000000000',
       '2022-01-28T05:57:49.000000000', '2022-01-27T13:17:49.000000000',
       '2022-01-26T20:37:49.000000000', '2022-01-26T03:57:49.000000000',
       '2022-01-23T09:17:49.000000000', '2022-01-22T16:37:49.000000000',
       '2022-01-21T23:57:49.000000000', '2022-01-21T07:17:49.000000000',
       '2022-01-20T14:37:49.000000000', '2022-01-19T21:57:49.000000000',
       '2022-01-19T05:17:49.000000000', '2022-01-18T12:37:49.000000000',
       '2022-01-11T13:57:49.000000000', '2022-01-10T21:17:49.000000000',
       '2022-01-10T04:37:49.000000000', '2022-01-09T11:57:49.000000000',
       '2022-01-08T19:17:49.000000000', '2022-01-05

In [50]:
selfbans["Selfban_start"].nunique()

30

In [51]:
selfbans["Selfban_end"].unique()

array(['2022-02-08T03:04:29.000000000', '2022-02-07T10:24:29.000000000',
       '2022-02-06T17:44:29.000000000', '2022-02-06T01:04:29.000000000',
       '2022-02-05T08:24:29.000000000', '2022-02-04T15:44:29.000000000',
       '2022-02-03T23:04:29.000000000', '2022-02-03T06:24:29.000000000',
       '2022-02-02T13:44:29.000000000', '1970-01-01T00:00:00.000000000',
       '2022-01-29T09:44:29.000000000', '2022-01-28T17:04:29.000000000',
       '2022-01-28T00:24:29.000000000', '2022-01-27T07:44:29.000000000',
       '2022-01-24T13:04:29.000000000', '2022-01-23T20:24:29.000000000',
       '2022-01-23T03:44:29.000000000', '2022-01-22T11:04:29.000000000',
       '2022-01-21T18:24:29.000000000', '2022-01-21T01:44:29.000000000',
       '2022-01-20T09:04:29.000000000', '2022-01-19T16:24:29.000000000',
       '2022-01-12T17:44:29.000000000', '2022-01-12T01:04:29.000000000',
       '2022-01-11T08:24:29.000000000', '2022-01-10T15:44:29.000000000',
       '2022-01-09T23:04:29.000000000', '2022-01-06

In [52]:
selfbans["Selfban_end"].nunique()

30

## TRANSACTIONS

In [15]:
transactions = pd.read_csv(
    INPUTS + "\\" + TRANSACTIONS,
    delimiter=TRANSACTIONS_SEPARATOR,
    dtype=TRANSACTIONS_DTYPE,
    parse_dates=TRANSACTIONS_DATES,
    date_parser=myparser
)

In [16]:
transactions.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 29 entries, 0 to 28
Data columns (total 5 columns):
 #   Column          Non-Null Count  Dtype         
---  ------          --------------  -----         
 0   PlayerNr        29 non-null     object        
 1   Trx_Type        29 non-null     object        
 2   Trx_time        29 non-null     datetime64[ns]
 3   Balance_before  29 non-null     float64       
 4   Balance_after   29 non-null     float64       
dtypes: datetime64[ns](1), float64(2), object(2)
memory usage: 1.3+ KB


In [24]:
transactions.head(3)

Unnamed: 0,PlayerNr,Trx_Type,Trx_time,Balance_before,Balance_after
0,101,win,2022-02-08 03:06:40,111,888
1,102,win,2022-02-08 03:06:40,121,500
2,103,win,2022-02-08 03:06:40,131,112


In [53]:
transactions["PlayerNr"].unique()

array(['101', '102', '103', '104', '105', '106', '107', '108', '109',
       '110', '111', '112', '113', '114', '115', '116', '117', '118',
       '119', '120', '121', '122', '123', '124', '125', '126', '127',
       '128', '129'], dtype=object)

In [56]:
transactions["PlayerNr"].nunique()

29

In [57]:
transactions["Trx_Type"].unique()

array(['win', 'deposit', 'withdrawal', 'stake'], dtype=object)

In [58]:
transactions["Trx_Type"].nunique()

4

In [59]:
transactions["Trx_time"].unique()

array(['2022-02-08T03:06:40.000000000'], dtype='datetime64[ns]')

In [60]:
transactions["Trx_time"].nunique()

1

# SQL

In [None]:
query01 = """
SELECT COUNT(*)
FROM players
LIMIT 10
"""

In [19]:
ps.sqldf(query01, locals())

Unnamed: 0,COUNT(*)
0,29


In [37]:
query02 = """
SELECT *
FROM players, selfbans
WHERE players.AccountId = selfbans.AccountId
AND players.Selfban_start >= selfbans.Selfban_start AND players.Selfban_start <= selfbans.Selfban_start
LIMIT 5
"""

In [38]:
ps.sqldf(query02, locals())

Unnamed: 0,AccountId,PlayerNr,Country,DateOfBirth,Source,Selfban_start,Selfban_end,First_genre,Registration_date,Currency,AccountId.1,Selfban_start.1,Selfban_end.1
0,1,101,PL,1988-02-07 00:31:26.000000,Facebook,2022-02-06 23:17:49.000000,2022-02-08 03:04:29.000000,arcade,2022-02-06 23:17:49.000000,PLN,1,2022-02-06 23:17:49.000000,2022-02-08 03:04:29.000000
1,2,102,UK,1988-02-08 04:18:06.000000,direct,2022-02-06 06:37:49.000000,2022-02-07 10:24:29.000000,arcade,2021-12-22 16:11:09.000000,GBP,2,2022-02-06 06:37:49.000000,2022-02-07 10:24:29.000000
2,3,103,FR,1988-02-09 08:04:46.000000,web,2022-02-05 13:57:49.000000,2022-02-06 17:44:29.000000,arcade,2021-11-06 09:04:29.000000,EUR,3,2022-02-05 13:57:49.000000,2022-02-06 17:44:29.000000
3,4,104,DE,1988-02-10 11:51:26.000000,Facebook,2022-02-04 21:17:49.000000,2022-02-06 01:04:29.000000,arcade,2021-09-21 01:57:49.000000,PLN,4,2022-02-04 21:17:49.000000,2022-02-06 01:04:29.000000
4,5,105,DK,1988-02-11 15:38:06.000000,direct,2022-02-04 04:37:49.000000,2022-02-05 08:24:29.000000,arcade,2021-08-05 18:51:09.000000,GBP,5,2022-02-04 04:37:49.000000,2022-02-05 08:24:29.000000


In [70]:
query03 = """
SELECT
*

FROM players

WHERE strftime('%Y', DateOfBirth) = '1988'
--WHERE date(DateOfBirth) < date('2019-03-19')
LIMIT 5
"""

In [71]:
ps.sqldf(query03, locals())

Unnamed: 0,AccountId,PlayerNr,Country,DateOfBirth,Source,Selfban_start,Selfban_end,First_genre,Registration_date,Currency
0,1,101,PL,1988-02-07 00:31:26.000000,Facebook,2022-02-06 23:17:49.000000,2022-02-08 03:04:29.000000,arcade,2022-02-06 23:17:49.000000,PLN
1,2,102,UK,1988-02-08 04:18:06.000000,direct,2022-02-06 06:37:49.000000,2022-02-07 10:24:29.000000,arcade,2021-12-22 16:11:09.000000,GBP
2,3,103,FR,1988-02-09 08:04:46.000000,web,2022-02-05 13:57:49.000000,2022-02-06 17:44:29.000000,arcade,2021-11-06 09:04:29.000000,EUR
3,4,104,DE,1988-02-10 11:51:26.000000,Facebook,2022-02-04 21:17:49.000000,2022-02-06 01:04:29.000000,arcade,2021-09-21 01:57:49.000000,PLN
4,5,105,DK,1988-02-11 15:38:06.000000,direct,2022-02-04 04:37:49.000000,2022-02-05 08:24:29.000000,arcade,2021-08-05 18:51:09.000000,GBP


In [76]:
query04 = """
SELECT
AVG(CAST((JulianDay(p.Selfban_end) - JulianDay(p.Selfban_start)) * 24 * 60 * 60 As Integer)) as time_in_sec

FROM players as p

WHERE strftime('%Y', p.DateOfBirth) = '1988'
--WHERE date(DateOfBirth) < date('2019-03-19')
LIMIT 5
"""

In [77]:
ps.sqldf(query04, locals())

Unnamed: 0,time_in_sec
0,72413
