In [2]:
import atoti as tt

Welcome to Atoti 0.8.10!

By using this community edition, you agree with the license available at https://docs.atoti.io/latest/eula.html.
Browse the official documentation at https://docs.atoti.io.
Join the community at https://www.atoti.io/register.

Atoti collects telemetry data, which is used to help understand how to improve the product.
If you don't wish to send usage data, you can request a trial license at https://www.atoti.io/evaluation-license-request.

You can hide this message by setting the `ATOTI_HIDE_EULA_MESSAGE` environment variable to True.


In [3]:
session = tt.Session(
    user_content_storage=".content",
    port=9092,
    java_options=["-Xms1G", "-Xmx10G"]
)

In [4]:
db_name = "olympicsOLAP"
db_user = "postgres"
db_password = "postgres"  # Update with your password
db_host = "pgdb"  # Update if your DB is hosted elsewhere
db_port = "5432"

jdbc_url = f"jdbc:postgresql://{db_host}:{db_port}/{db_name}?user={db_user}&password={db_password}"

In [5]:
# Creating the 'dimtime' table
dimtime_table = session.read_sql(
    "SELECT * FROM dimtime",
    keys=["timekey"],
    table_name="DimTime",
    url=jdbc_url,
)

# Creating the 'dimgdp' table
dimgdp_table = session.read_sql(
    "SELECT * FROM dimgdp",
    keys=["gdpkey"],
    table_name="DimGDP",
    url=jdbc_url,
)

# Creating the 'dimpopulation' table
dimpopulation_table = session.read_sql(
    "SELECT * FROM dimpopulation",
    keys=["populationkey"],
    table_name="DimPopulation",
    url=jdbc_url,
)

# Creating the 'dimlifeexpectancy' table
dimlifeexpectancy_table = session.read_sql(
    "SELECT * FROM dimlifeexpectancy",
    keys=["lifeexpectancykey"],
    table_name="DimLifeExpectancy",
    url=jdbc_url,
)

# Creating the 'dimeventattendtype' table
dimeventattendtype_table = session.read_sql(
    "SELECT * FROM dimeventattendtype",
    keys=["attendtypekey"],
    table_name="DimEventAttendType",
    url=jdbc_url,
)

# Creating the 'factolympicmedals' table
factolympicmedals_table = session.read_sql(
    "SELECT * FROM factolympicmedals",
    keys=["countrykey", "eventattendtypekey", "eventcategorykey", "gdpkey", "hostkey", "lifeexpectancykey", "medaltype", "mentalillnesskey", "populationkey"],
    table_name="FactOlympicMedals",
    url=jdbc_url,
)

# Creating the 'dimgamehost' table
dimgamehost_table = session.read_sql(
    "SELECT * FROM dimgamehost",
    keys=["gamehostkey"],
    table_name="DimGameHost",
    url=jdbc_url,
)

# Creating the 'dimmentalillness' table
dimmentalillness_table = session.read_sql(
    "SELECT * FROM dimmentalillness",
    keys=["mentalillnesskey"],
    table_name="DimMentalIllness",
    url=jdbc_url,
)

# Creating the 'dimcountry' table
dimcountry_table = session.read_sql(
    "SELECT * FROM dimcountry",
    keys=["countrykey"],
    table_name="DimCountry",
    url=jdbc_url,
)

# Creating the 'dimeventcategory' table
dimeventcategory_table = session.read_sql(
    "SELECT * FROM dimeventcategory",
    keys=["eventcategorykey"],
    table_name="DimEventCategory",
    url=jdbc_url,
)

In [6]:
# Join 'dimtime' with the fact table
factolympicmedals_table.join(
    dimtime_table,
    factolympicmedals_table["timekey"] == dimtime_table["timekey"]
)

# Join 'dimgdp' with the fact table
factolympicmedals_table.join(
    dimgdp_table,
    factolympicmedals_table["gdpkey"] == dimgdp_table["gdpkey"]
)

# Join 'dimpopulation' with the fact table
factolympicmedals_table.join(
    dimpopulation_table,
    factolympicmedals_table["populationkey"] == dimpopulation_table["populationkey"]
)

# Join 'dimlifeexpectancy' with the fact table
factolympicmedals_table.join(
    dimlifeexpectancy_table,
    factolympicmedals_table["lifeexpectancykey"] == dimlifeexpectancy_table["lifeexpectancykey"]
)

# Join 'dimeventattendtype' with the fact table
factolympicmedals_table.join(
    dimeventattendtype_table,
    factolympicmedals_table["eventattendtypekey"] == dimeventattendtype_table["attendtypekey"]
)

# Join 'dimgamehost' with the fact table
factolympicmedals_table.join(
    dimgamehost_table,
    factolympicmedals_table["hostkey"] == dimgamehost_table["gamehostkey"]
)

# Join 'dimmentalillness' with the fact table
factolympicmedals_table.join(
    dimmentalillness_table,
    factolympicmedals_table["mentalillnesskey"] == dimmentalillness_table["mentalillnesskey"]
)

# Join 'dimcountry' with the fact table
factolympicmedals_table.join(
    dimcountry_table,
    factolympicmedals_table["countrykey"] == dimcountry_table["countrykey"]
)

# Join 'dimeventcategory' with the fact table
factolympicmedals_table.join(
    dimeventcategory_table,
    factolympicmedals_table["eventcategorykey"] == dimeventcategory_table["eventcategorykey"]
)


In [7]:
session.tables.schema

```mermaid
erDiagram
  "DimGameHost" {
    _ int PK "gamehostkey"
    _ String "gamehostname"
    _ String "gamehostcity"
    _ String "gamehostseasontype"
    _ String "gamehostcountry"
  }
  "DimMentalIllness" {
    _ int PK "mentalillnesskey"
    _ String "mentalillnesspercent"
  }
  "DimEventAttendType" {
    _ int PK "attendtypekey"
    _ String "attendgender"
    _ String "attendmembertype"
  }
  "DimEventCategory" {
    _ String "eventdiscipline"
    _ String "eventname"
    _ int PK "eventcategorykey"
  }
  "DimGDP" {
    _ int PK "gdpkey"
    _ String "gdppercent"
  }
  "DimTime" {
    _ String PK "timekey"
    _ String "timeyear"
    _ String "timedecade"
  }
  "DimPopulation" {
    _ int PK "populationkey"
    _ String "populationpercent"
  }
  "DimCountry" {
    _ int PK "countrykey"
    _ String "countryalternativekey"
    _ String "countryname"
    _ String "countryregion"
  }
  "FactOlympicMedals" {
    _ int PK "countrykey"
    _ int PK "hostkey"
    _ String PK "medaltype"
    _ int PK "eventattendtypekey"
    _ int PK "eventcategorykey"
    _ String "timekey"
    _ int PK "populationkey"
    _ int PK "lifeexpectancykey"
    _ int PK "mentalillnesskey"
    _ int PK "gdpkey"
  }
  "DimLifeExpectancy" {
    _ int PK "lifeexpectancykey"
    _ String "lifeexpectancypercent"
  }
  "FactOlympicMedals" }o--o| "DimGameHost" : "`hostkey` == `gamehostkey`"
  "FactOlympicMedals" }o--o| "DimEventAttendType" : "`eventattendtypekey` == `attendtypekey`"
  "FactOlympicMedals" }o--o| "DimEventCategory" : "`eventcategorykey` == `eventcategorykey`"
  "FactOlympicMedals" }o--o| "DimGDP" : "`gdpkey` == `gdpkey`"
  "FactOlympicMedals" }o--o| "DimCountry" : "`countrykey` == `countrykey`"
  "FactOlympicMedals" }o--o| "DimTime" : "`timekey` == `timekey`"
  "FactOlympicMedals" }o--o| "DimPopulation" : "`populationkey` == `populationkey`"
  "FactOlympicMedals" }o--o| "DimLifeExpectancy" : "`lifeexpectancykey` == `lifeexpectancykey`"
  "FactOlympicMedals" }o--o| "DimMentalIllness" : "`mentalillnesskey` == `mentalillnesskey`"
```


In [8]:
cube = session.create_cube(factolympicmedals_table)

In [9]:
cube

In [10]:
hierarchies, levels, measures = cube.hierarchies, cube.levels, cube.measures

In [11]:
levels

In [12]:
hierarchies

In [13]:
# Create Hierarchies
hierarchies["Time"] = [levels["timedecade"], levels["timeyear"]]
hierarchies["Country"] = [levels["countryregion"], levels["countryname"]]
hierarchies["Event"] = [levels["eventdiscipline"], levels["eventname"]]
hierarchies["GDP"] = [levels["gdppercent"]]
hierarchies["Population"] = [levels["populationpercent"]]
hierarchies["Life Expectancy"] = [levels["lifeexpectancypercent"]]
hierarchies["Mental Illness"] = [levels["mentalillnesspercent"]]
hierarchies["Game Host"] = [levels["gamehostseasontype"], levels["gamehostcountry"], levels["gamehostcity"]]
hierarchies["Attend Type"] = [levels["attendmembertype"], levels["attendgender"]]

In [14]:
hierarchies

In [15]:
# Delete abundance
del cube.hierarchies[('DimCountry', 'countryname')]
del cube.hierarchies[('DimCountry', 'countryregion')]
del cube.hierarchies[('DimCountry', 'countryalternativekey')]
del cube.hierarchies[('DimEventAttendType', 'attendmembertype')]
del cube.hierarchies[('DimEventAttendType', 'attendgender')]
del cube.hierarchies[('DimEventCategory', 'eventname')]
del cube.hierarchies[('DimEventCategory', 'eventdiscipline')]
del cube.hierarchies[('DimGDP', 'gdppercent')]
del cube.hierarchies[('DimGameHost', 'gamehostcity')]
del cube.hierarchies[('DimGameHost', 'gamehostcountry')]
del cube.hierarchies[('DimGameHost', 'gamehostname')]
del cube.hierarchies[('DimGameHost', 'gamehostseasontype')]
del cube.hierarchies[('DimLifeExpectancy', 'lifeexpectancypercent')]
del cube.hierarchies[('DimMentalIllness', 'mentalillnesspercent')]
del cube.hierarchies[('DimPopulation', 'populationpercent')]
del cube.hierarchies[('DimTime', 'timedecade')]
del cube.hierarchies[('DimTime', 'timeyear')]
del cube.hierarchies[('FactOlympicMedals','countrykey')]
del cube.hierarchies[('FactOlympicMedals','eventattendtypekey')]
del cube.hierarchies[('FactOlympicMedals','eventcategorykey')]
del cube.hierarchies[('FactOlympicMedals','gdpkey')]
del cube.hierarchies[('FactOlympicMedals','hostkey')]
del cube.hierarchies[('FactOlympicMedals','lifeexpectancykey')]
del cube.hierarchies[('FactOlympicMedals','mentalillnesskey')]
del cube.hierarchies[('FactOlympicMedals','populationkey')]
del cube.hierarchies[('FactOlympicMedals','timekey')]
del cube.hierarchies[('FactOlympicMedals','medaltype')]

In [16]:
hierarchies

In [17]:
measures

In [18]:
del measures["contributors.COUNT"]

In [19]:
levels

In [20]:
# create measures gold count
measures["Gold Medals Count"] = tt.agg.sum(
    tt.where(factolympicmedals_table["medaltype"] == "GOLD", 1, 0)
)

In [21]:
# create measures medal score
measures["Medal Score"] = tt.agg.sum(
    tt.where(
        factolympicmedals_table["medaltype"] == "GOLD", 3,
        tt.where(
            factolympicmedals_table["medaltype"] == "SILVER", 2,
            tt.where(
                factolympicmedals_table["medaltype"] == "BRONZE", 1,
                0
            )
        )
    )
)

In [22]:
measures

In [23]:
# Query 1
AU10result=cube.query(measures["Medal Score"], levels=[levels[('countryname')],levels[('eventdiscipline')]],
      filter=hierarchies["Country"].isin(
         ("Oceania", "Australia")
    ))

In [24]:
AU10result

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Medal Score
countryregion,countryname,eventdiscipline,Unnamed: 3_level_1
Oceania,Australia,Alpine Skiing,1
Oceania,Australia,Archery,5
Oceania,Australia,Athletics,145
Oceania,Australia,Baseball,2
Oceania,Australia,Basketball,9
Oceania,Australia,Beach Volleyball,6
Oceania,Australia,Boxing,6
Oceania,Australia,Canoe Marathon,1
Oceania,Australia,Canoe Slalom,12
Oceania,Australia,Canoe Sprint,32


In [25]:
# Query 2
AUhisresult=cube.query(measures["Medal Score"], levels=[levels[('countryname')],levels[('timeyear')],levels[('gamehostseasontype')]],
      filter=hierarchies["Country"].isin(
         ("Oceania", "Australia")
    ))

In [26]:
AUhisresult

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Medal Score
countryregion,countryname,timedecade,timeyear,gamehostseasontype,Unnamed: 5_level_1
Oceania,Australia,1890,1896,Summer,6
Oceania,Australia,1900,1900,Summer,9
Oceania,Australia,1900,1904,Summer,7
Oceania,Australia,1920,1920,Summer,5
Oceania,Australia,1920,1924,Summer,13
Oceania,Australia,1920,1928,Summer,8
Oceania,Australia,1930,1932,Summer,12
Oceania,Australia,1930,1936,Summer,1
Oceania,Australia,1940,1948,Summer,23
Oceania,Australia,1950,1952,Summer,25


In [27]:
# Query 3
GENDERresult=cube.query(measures["Medal Score"], levels=[levels[('attendgender')],levels[('timeyear')]]
    )

In [28]:
GENDERresult

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Medal Score
attendmembertype,attendgender,timedecade,timeyear,Unnamed: 4_level_1
Single,SingleMen,1890,1896,232
Single,SingleMen,1900,1900,332
Single,SingleMen,1900,1904,455
Single,SingleMen,1900,1908,418
Single,SingleMen,1910,1912,375
...,...,...,...,...
Team,TeamWomen,2010,2014,46
Team,TeamWomen,2010,2016,216
Team,TeamWomen,2010,2018,48
Team,TeamWomen,2020,2020,251


In [29]:
# Query 4
MENTALresult=cube.query(measures["Gold Medals Count"], levels=[levels[('mentalillnesspercent')]]
    )

In [30]:
MENTALresult

Unnamed: 0_level_0,Gold Medals Count
mentalillnesspercent,Unnamed: 1_level_1
0.0,1
10.0,471
100.0,682
20.0,87
30.0,290
40.0,57
50.0,82
60.0,103
70.0,77
80.0,265


In [31]:
# Query 5
EU10result=cube.query(measures["Medal Score"], levels=[levels[('countryregion')],levels[('eventdiscipline')]],
      filter=levels["countryregion"] == "Europe")

In [32]:
EU10result

Unnamed: 0_level_0,Unnamed: 1_level_0,Medal Score
countryregion,eventdiscipline,Unnamed: 2_level_1
Europe,3x3 Basketball,8
Europe,Alpine Skiing,860
Europe,Archery,186
Europe,Artistic Gymnastics,30
Europe,Artistic Swimming,8
Europe,...,...
Europe,Volleyball,83
Europe,Water Motorsports,9
Europe,Water Polo,166
Europe,Weightlifting,686


In [33]:
# Query 6
GDPresult=cube.query(measures["Gold Medals Count"], levels=[levels[('gdppercent')]]
    )

In [34]:
GDPresult

Unnamed: 0_level_0,Gold Medals Count
gdppercent,Unnamed: 1_level_1
10.0,1
100.0,2953
20.0,4
30.0,12
40.0,22
50.0,42
60.0,96
70.0,195
80.0,289
90.0,489


In [None]:
#Query 7
AUhisGOLDresult=cube.query(measures["Gold Medals Count"], levels=[levels[('countryname')],levels[('timedecade')]],
      filter=hierarchies["Country"].isin(
         ("Oceania", "Australia")
    ))

In [37]:
AUhisGOLDresult

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Gold Medals Count
countryregion,countryname,timedecade,timeyear,Unnamed: 4_level_1
Oceania,Australia,1890,1896,2
Oceania,Australia,1900,1900,2
Oceania,Australia,1900,1904,0
Oceania,Australia,1920,1920,0
Oceania,Australia,1920,1924,3
Oceania,Australia,1920,1928,1
Oceania,Australia,1930,1932,3
Oceania,Australia,1930,1936,0
Oceania,Australia,1940,1948,2
Oceania,Australia,1950,1952,6


In [38]:
#Query 8
AUNZgendercompare=cube.query(measures["Gold Medals Count"], levels=[levels[('countryname')],levels[('attendgender')]],
      filter=hierarchies["Country"].isin(
         ("Oceania", "Australia"),("Oceania","New Zealand")
    ))

In [39]:
AUNZgendercompare

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Gold Medals Count
countryregion,countryname,attendmembertype,attendgender,Unnamed: 4_level_1
Oceania,Australia,Single,SingleMen,62
Oceania,Australia,Single,SingleOpen,1
Oceania,Australia,Single,SingleWomen,54
Oceania,Australia,Team,TeamMen,27
Oceania,Australia,Team,TeamMixed,0
Oceania,Australia,Team,TeamOpen,6
Oceania,Australia,Team,TeamWomen,20
Oceania,New Zealand,Single,SingleMen,18
Oceania,New Zealand,Single,SingleOpen,5
Oceania,New Zealand,Single,SingleWomen,11


In [45]:
# Query 9
OceaniaResult=cube.query(measures["Medal Score"], levels=[levels[('countryname')]],
      filter=hierarchies["Country"].isin(
         ("Oceania",)
    ))

In [46]:
OceaniaResult

Unnamed: 0_level_0,Unnamed: 1_level_0,Medal Score
countryregion,countryname,Unnamed: 2_level_1
Oceania,Australia,1086
Oceania,Fiji,7
Oceania,New Zealand,288
Oceania,Samoa,2
Oceania,Tonga,2


In [50]:
# Query 10
POPresult=cube.query(measures["Gold Medals Count"], levels=[levels[('populationpercent')]]
    )

In [51]:
POPresult

Unnamed: 0_level_0,Gold Medals Count
populationpercent,Unnamed: 1_level_1
10.0,1
100.0,1685
20.0,4
30.0,52
40.0,115
50.0,173
60.0,237
70.0,171
80.0,228
90.0,397


In [52]:
# Query 11
LIFEresult=cube.query(measures["Gold Medals Count"], levels=[levels[('lifeexpectancypercent')]]
    )

In [53]:
LIFEresult

Unnamed: 0_level_0,Gold Medals Count
lifeexpectancypercent,Unnamed: 1_level_1
0.0,0
10.0,18
100.0,1672
20.0,46
30.0,28
40.0,208
50.0,208
60.0,196
70.0,355
80.0,356


In [57]:
# Query 12
RegionDCD=cube.query(measures["Medal Score"], levels=[levels[('timedecade')],levels[('countryregion')]])

In [58]:
RegionDCD

Unnamed: 0_level_0,Unnamed: 1_level_0,Medal Score
timedecade,countryregion,Unnamed: 2_level_1
1890,Europe,190
1890,North America,47
1890,Oceania,6
1900,Africa,5
1900,Asia,4
...,...,...
2020,Asia,619
2020,Europe,1384
2020,North America,444
2020,Oceania,147


In [59]:
# Query 13
EUbycountryresult=cube.query(measures["Gold Medals Count"], levels=[levels[('countryname')]],
      filter=levels["countryregion"] == "Europe")

In [60]:
EUbycountryresult

Unnamed: 0_level_0,Unnamed: 1_level_0,Gold Medals Count
countryregion,countryname,Unnamed: 2_level_1
Europe,Austria,90
Europe,Belarus,21
Europe,Belgium,45
Europe,Bohemia,0
Europe,Bulgaria,55
Europe,Croatia,18
Europe,Cyprus,0
Europe,Czech Republic,29
Europe,Czechoslovakia,50
Europe,Denmark,48


In [61]:
# Query 14
EUbydecaderesult=cube.query(measures["Gold Medals Count"], levels=[levels[('timedecade')]],
      filter=levels["countryregion"] == "Europe")

In [62]:
EUbydecaderesult

Unnamed: 0_level_0,Gold Medals Count
timedecade,Unnamed: 1_level_1
1890,29
1900,157
1910,69
1920,278
1930,166
1940,95
1950,213
1960,376
1970,340
1980,544


In [72]:
# Query 15
EUNAGender = cube.query(
    measures["Gold Medals Count"],
    levels=[levels[("attendgender")],levels[("countryregion")]],
     filter=hierarchies["Country"].isin(
         ("Europe", ),("North America",)
    )
)

In [73]:
EUNAGender

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Gold Medals Count
attendmembertype,attendgender,countryregion,Unnamed: 3_level_1
Single,SingleMen,Europe,1892
Single,SingleMen,North America,769
Single,SingleOpen,Europe,85
Single,SingleOpen,North America,11
Single,SingleWomen,Europe,820
Single,SingleWomen,North America,309
Team,TeamMen,Europe,637
Team,TeamMen,North America,214
Team,TeamMixed,Europe,55
Team,TeamMixed,North America,11
