# Stammdaten für Cassandra erzeugen

Erzeuge exemplarische semi-strukturierte Daten für Cassandra

In [1]:
from cassandra.cluster import Cluster
from cassandra.auth import PlainTextAuthProvider
import json

### Configure connection to Cassandra

In [2]:
auth_provider = PlainTextAuthProvider(username='trainadm', password='train@thinkport')

CASSANDRA_SERVER = "cassandra.nosql.svc.cluster.local"
cluster = Cluster([CASSANDRA_SERVER], port=9042, auth_provider=auth_provider)
session = cluster.connect()

In [3]:
### Generate some Stammdaten for each Country

In [9]:
country_data = [
  {
    "id": 1,
    "name": "USA",
    "code": "US",
    "population": 329484123,
    "under_20": 24,
    "urban": 83,
    "working_age": 64
  },
  {
    "id": 2,
    "name": "Brazil",
    "code": "BR",
    "population": 212559409,
    "under_20": 28,
    "urban": 88,
    "working_age": 69
  },
  {
    "id": 3,
    "name": "Spain",
    "code": "ES",
    "population": 47351567,
    "under_20": 19,
    "urban": 80,
    "working_age": 66,
    "economic_indicators": {
        "inflation_rate": {"value": "3.0", "unit": "%"},
        "gdp_per_capita": {"value": "23450", "unit": "€"},
        "goverment_gross_debt": {"value": "118.7", "unit": "%"},
        "unemployment_rate": {"value": "14.8", "unit": "%"},
        "goverment_deficit": {"value": "-6.9", "unit": "%", "year": "2021"}
    }
  },
  {
    "id": 4,
    "name": "Germany",
    "code": "DE",
    "population": 83240525,
    "under_20": 18,
    "urban": 76,
    "working_age": 64,
    "economic_indicators": {
        "inflation_rate": {"value": "3.2", "unit": "%"},
        "gdp_per_capita": {"value": "35480", "unit": "€"},
        "goverment_gross_debt": {"value": "69.3", "unit": "%"},
        "unemployment_rate": {"value": "3.7", "unit": "%"}
    }
  },
  {
    "id": 5,
    "name": "United Kingdom",
    "code": "UK",
    "population": 67215293,
    "under_20": 23,
    "urban": 83,
    "working_age": 63
  },
  {
    "id": 6,
    "name": "India",
    "code": "IN",
    "population": 1380004385,
    "under_20": 34,
    "urban": 35,
    "working_age": 69
  },
  {
    "id": 7,
    "name": "France",
    "code": "FR",
    "population": 67391582,
    "under_20": 23,
    "urban": 82,
    "working_age": 61,
    "economic_indicators": {
        "inflation_rate": {"value": "2.1", "unit": "%"},
        "gdp_per_capita": {"value": "32530", "unit": "€"},
        "goverment_gross_debt": {"value": "112.9", "unit": "%"},
        "unemployment_rate": {"value": "7.9", "unit": "%"}
    }
  }
]

### 1) Create Keyspace

In [5]:
# nur nötig wenn der Keyspace noch nicht über cqlsh ausgeführt wurde
session.execute(
    """
    CREATE KEYSPACE IF NOT EXISTS countries 
    WITH REPLICATION = { 'class' : 'SimpleStrategy', 'replication_factor' : 1 }
    """
)

<cassandra.cluster.ResultSet at 0x7f12a947a2b0>

### 2) Create Table

In [6]:
# nur nötig wenn die Tabelle noch nicht über cqlsh ausgeführt wurde

session.execute(
    """
    
    CREATE TABLE IF NOT EXISTS countries.population (
    id int,
    name text,
    code text,
    population bigint,
    under_20 int,
    urban int,
    working_age int,
    economic_indicators frozen<map<text,map<text,text>>>,
    PRIMARY KEY ((name),population)
    );
    """
)

<cassandra.cluster.ResultSet at 0x7f12d4c0e490>

### 3) Insert data as JSON

In [10]:
for country in country_data:
    country_json=json.dumps(country)
    #print(country)
    sql=f"""
        INSERT INTO countries.population JSON '{country_json}' 
        """
    print(sql)
    session.execute(sql)


        INSERT INTO countries.population JSON '{"id": 1, "name": "USA", "code": "US", "population": 329484123, "under_20": 24, "urban": 83, "working_age": 64}' 
        

        INSERT INTO countries.population JSON '{"id": 2, "name": "Brazil", "code": "BR", "population": 212559409, "under_20": 28, "urban": 88, "working_age": 69}' 
        

        INSERT INTO countries.population JSON '{"id": 3, "name": "Spain", "code": "ES", "population": 47351567, "under_20": 19, "urban": 80, "working_age": 66, "economic_indicators": {"inflation_rate": {"value": "3.0", "unit": "%"}, "gdp_per_capita": {"value": "23450", "unit": "\u20ac"}, "goverment_gross_debt": {"value": "118.7", "unit": "%"}, "unemployment_rate": {"value": "14.8", "unit": "%"}, "goverment_deficit": {"value": "-6.9", "unit": "%", "year": "2021"}}}' 
        

        INSERT INTO countries.population JSON '{"id": 4, "name": "Germany", "code": "DE", "population": 83240525, "under_20": 18, "urban": 76, "working_age": 64, "economic_i

In [None]:
# connection wieder schließen
cluster.shutdown()