# Structure de la table des stations au départ

```CQL
CREATE KEYSPACE velib_db WITH replication = {'class': 'SimpleStrategy', 'replication_factor': '2'}  AND durable_writes = true;

CREATE TABLE velib_db.station_dynamique (
    number int,
    moment timestamp,
    bikes int,
    spaces int,
    PRIMARY KEY (number, moment)
) WITH CLUSTERING ORDER BY (moment ASC)
    AND bloom_filter_fp_chance = 0.01
    AND caching = {'keys': 'ALL', 'rows_per_partition': 'NONE'}
    AND comment = ''
    AND compaction = {'class': 'org.apache.cassandra.db.compaction.SizeTieredCompactionStrategy', 'max_threshold': '32', 'min_threshold': '4'}
    AND compression = {'chunk_length_in_kb': '64', 'class': 'org.apache.cassandra.io.compress.LZ4Compressor'}
    AND crc_check_chance = 1.0
    AND dclocal_read_repair_chance = 0.1
    AND default_time_to_live = 0
    AND gc_grace_seconds = 864000
    AND max_index_interval = 2048
    AND memtable_flush_period_in_ms = 0
    AND min_index_interval = 128
    AND read_repair_chance = 0.0
    AND speculative_retry = '99PERCENTILE';
```

In [18]:
from cassandra.cluster import Cluster

keyspace = "velib_db"
family = "station_dynamique"

cluster = Cluster(["84.39.48.220","84.39.45.143","84.39.45.149","84.39.48.102"])
session = cluster.connect(keyspace)

# add static columns to the stations table

In [19]:
from cassandra.query import dict_factory
from cassandra.query import tuple_factory


add_static_column_query = "ALTER TABLE {table} ADD {column} {type} STATIC"

columns = ["name","address","longitude","latitude","banking","bonus","status","bike_stands","isolation_factor","owm_city_id"]
types = ["text","text","double","double","boolean","boolean","text","int","int","bigint"]


for col_name,col_type in zip(columns,types):
    try:
        session.execute(add_static_column_query.format(table=family, column=col_name,type=col_type))
    except:
        print(col_name + " is probably already defined")


# structure de la base avec les colonnes statiques
```CQL
CREATE TABLE velib_db.station_dynamique (
    number int,
    moment timestamp,
    address text static,
    banking boolean static,
    bike_stands int static,
    bikes int,
    bonus boolean static,
    isolation_factor int static,
    latitude double static,
    longitude double static,
    name text static,
    owm_city_id bigint static,
    spaces int,
    status text static,
    PRIMARY KEY (number, moment)
) WITH CLUSTERING ORDER BY (moment ASC)
    AND bloom_filter_fp_chance = 0.01
    AND caching = {'keys': 'ALL', 'rows_per_partition': 'NONE'}
    AND comment = ''
    AND compaction = {'class': 'org.apache.cassandra.db.compaction.SizeTieredCompactionStrategy', 'max_threshold': '32', 'min_threshold': '4'}
    AND compression = {'chunk_length_in_kb': '64', 'class': 'org.apache.cassandra.io.compress.LZ4Compressor'}
    AND crc_check_chance = 1.0
    AND dclocal_read_repair_chance = 0.1
    AND default_time_to_live = 0
    AND gc_grace_seconds = 864000
    AND max_index_interval = 2048
    AND memtable_flush_period_in_ms = 0
    AND min_index_interval = 128
    AND read_repair_chance = 0.0
    AND speculative_retry = '99PERCENTILE';
```

### import static data from previously exported stations csv file (from /data/static/stations.csv)

In [20]:
import pandas as pd

stations = pd.read_csv("data/static/stations.csv"\
                       ,sep=';'
                      #,dtype={"bike_stands":np.int32,"isolation_factor":np.int32,"owm_city_id":np.int32}\
                      #,converters= {"isolation_factor":np.int32}\
                      # ,na_filter = False\
                      )


cols_to_int = ["bike_stands","isolation_factor","owm_city_id"]

def convert_to_int(col,cols):
    if col.name in cols:
        return col.astype(int)
    else:
        return col

stations = stations.loc[:,["number"] + columns]
                   #.apply(convert_to_int,args=[cols_to_int])\
                   

update_stations_cql = "UPDATE {table} SET {values} WHERE number = {station}"
    

def update_reqs(row,batch):
    value_str = "{key}={quote}{value}{quote},"
    values = ""
    for key,value in zip(row.index[1:],row.values[1:]):
        quote = ''
        if isinstance(value,str):
            quote = "\'"
            # escape all ' in CQL with a ''
            value = value.replace("'","''")
        values += value_str.format(key=key,quote=quote,value=value)
    
    #return update_stations_cql.format(table=family,values = values[:-1] ,station = int(row["number"]),axis=1)
    batch.add(update_stations_cql.format(table=family,values = values[:-1] ,station = int(row["number"]),axis=1))

#reqs = stations.apply(update_reqs,axis=1,args=[batch] )
#reqs[1]

#filter on n/a values in a dataframe
#stations[stations.isnull().any(axis=1)]


### Build the batch statement

In [21]:
from cassandra.query import BatchStatement
from cassandra import ConsistencyLevel

batch = BatchStatement(consistency_level=ConsistencyLevel.ANY)

# fill the batch with all updates
stations.apply(update_reqs,axis=1,args=[batch] )

batch


<BatchStatement type=LOGGED, statements=1225, consistency=ANY>

Execute batch statement

In [22]:
session.execute(batch)

<cassandra.cluster.ResultSet at 0x273aa23ba90>