# Country Data for Cassandra

Notebook um Fake Daten nach Cassandra zu laden

In [None]:
# install module if not in Image
#!pip install cassandra-driver

In [None]:
from cassandra.cluster import Cluster
from cassandra.auth import PlainTextAuthProvider
import json

### Configure connection to Cassandra

In [20]:
auth_provider = PlainTextAuthProvider(username='trainadm', password='train@thinkport')

CASSANDRA_SERVER = "cassandra.nosql.svc.cluster.local"
cluster = Cluster([CASSANDRA_SERVER], port=9042, auth_provider=auth_provider)
session = cluster.connect()

In [1]:
### Generate some Stammdaten for each Country

In [17]:
country_data = [
  {
    "id": 1,
    "name": "USA",
    "code": "US",
    "population": 329484123,
    "pct_under_20": 24,
    "pct_urban": 83,
    "pct_working_age": 64
  },
  {
    "id": 2,
    "name": "Brazil",
    "code": "BR",
    "population": 212559409,
    "pct_under_20": 28,
    "pct_urban": 88,
    "pct_working_age": 69
  },
  {
    "id": 3,
    "name": "Spain",
    "code": "ES",
    "population": 47351567,
    "pct_under_20": 19,
    "pct_urban": 80,
    "pct_working_age": 66,
    "economic_indicators": {
        "inflation_rate": {"value": "3.0", "unit": "%"},
        "gdp_per_capita": {"value": "23450", "unit": "€"},
        "goverment_gross_debt": {"value": "118.7", "unit": "%"},
        "unemployment_rate": {"value": "14.8", "unit": "%"},
        "goverment_deficit": {"value": "-6.9", "unit": "%", "year": "2021"}
    }
  },
  {
    "id": 4,
    "name": "Germany",
    "code": "DE",
    "population": 83240525,
    "pct_under_20": 18,
    "pct_urban": 76,
    "pct_working_age": 64,
    "economic_indicators": {
        "inflation_rate": {"value": "3.2", "unit": "%"},
        "gdp_per_capita": {"value": "35480", "unit": "€"},
        "goverment_gross_debt": {"value": "69.3", "unit": "%"},
        "unemployment_rate": {"value": "3.7", "unit": "%"}
    }
  },
  {
    "id": 5,
    "name": "United Kingdom",
    "code": "UK",
    "population": 67215293,
    "pct_under_20": 23,
    "pct_urban": 83,
    "pct_working_age": 63
  },
  {
    "id": 6,
    "name": "India",
    "code": "IN",
    "population": 1380004385,
    "pct_under_20": 34,
    "pct_urban": 35,
    "pct_working_age": 69
  },
  {
    "id": 7,
    "name": "France",
    "code": "FR",
    "population": 67391582,
    "pct_under_20": 23,
    "pct_urban": 82,
    "pct_working_age": 61,
    "economic_indicators": {
        "inflation_rate": {"value": "2.1", "unit": "%"},
        "gdp_per_capita": {"value": "32530", "unit": "€"},
        "goverment_gross_debt": {"value": "112.9", "unit": "%"},
        "unemployment_rate": {"value": "7.9", "unit": "%"}
    }
  }
]

### 1) Create Keyspace

In [21]:
session.execute(
    """
    CREATE KEYSPACE IF NOT EXISTS countries 
    WITH REPLICATION = { 'class' : 'SimpleStrategy', 'replication_factor' : 1 }
    """
)

<cassandra.cluster.ResultSet at 0x7f5c2d1d7af0>

### 2) Create Table

In [23]:
session.execute(
    """
    CREATE TABLE IF NOT EXISTS countries.country_population (id int PRIMARY KEY, name text, code text, population bigint, pct_under_20 int, pct_urban int, pct_working_age int, economic_indicators frozen<map<text,map<text,text>>>);
    """
)

<cassandra.cluster.ResultSet at 0x7f5c2c1563a0>

### 3) Insert data as JSON

In [None]:
for country in country_data:
    country_json=json.dumps(country)
    #print(country)
    sql=f"""
        INSERT INTO countries.country_population JSON '{country_json}' 
        """
    print(sql)
    session.execute(sql)

### 4) Alternative insert data relational

In [25]:
for country in country_data:
    session.execute(
        """
        INSERT INTO countries.country_population (
            id,
            name,
            code,
            population,
            pct_under_20,
            pct_urban,
            pct_working_age
        ) VALUES (%s, %s, %s, %s, %s, %s, %s)
        """, (
        country["id"],
        country["name"],
        country["code"],
        country["population"],
        country["pct_under_20"],
        country["pct_urban"],
        country["pct_working_age"]
    )
    )

In [27]:
cluster.shutdown()