In [1]:
# Create a connection

from cassandra.cluster import Cluster
try: 
    cluster = Cluster(['127.0.0.1']) #If you have a locally installed Apache Cassandra instance
    session = cluster.connect()
except Exception as e:
    print(e)

In [2]:
# Create a keyspace
try:
    session.execute("""
    CREATE KEYSPACE IF NOT EXISTS killrvideo
    WITH REPLICATION = 
    { 'class' : 'SimpleStrategy', 'replication_factor' : 1 }"""
)

except Exception as e:
    print(e)

In [3]:
# Set the keyspace
try:
    session.set_keyspace('killrvideo')
except Exception as e:
    print(e)

In [26]:
# partition key = tag and video_id = clustering column
query = """
CREATE TABLE IF NOT EXISTS videos_by_tag(
    tag text,
    video_id text,
    added_date timestamp,
    title text,
    PRIMARY KEY ((tag), added_date, video_id)
)
WITH CLUSTERING ORDER BY (added_date DESC);
"""
try:
    session.execute(query)
except Exception as e:
    print(e)

In [27]:
import csv

file = r'C:\Users\abdul\cassandra-fundamentals\foundations-of-apache-cassandra\videos-by-tag.csv'

with open(file) as f:
    reader = csv.reader(f)
    next(reader)
    for line in reader:
        query = "INSERT INTO videos_by_tag(tag, video_id, added_date, title)"
        query = query + "VALUES(%s, %s, %s, %s)"
        session.execute(query, (line[0], line[1], line[2], line[3]))

## Lets extract 3 columns from videos_by_tag (tag, video_id, added_date)

In [28]:
query = """
    SELECT tag, video_id, added_date
    FROM videos_by_tag
"""
rows = session.execute(query)
for row in rows:
    print(row.tag, row.video_id, row.added_date)

datastax 4845ed97-14bd-11e5-8a40-8338255b7e33 2013-10-15 23:00:00
datastax 5645f8bd-14bd-11e5-af1a-8638355b8e3a 2013-04-15 23:00:00
cassandra 1645ea59-14bd-11e5-a993-8138354b7e31 2014-01-28 23:00:00
cassandra 3452f7de-14bd-11e5-855e-8738355b7e3a 2013-03-16 23:00:00
cassandra 245e8024-14bd-11e5-9743-8238356b7e32 2012-04-02 23:00:00


## Execute your query again, but list the oldest videos first

To use IN keyword set PAGINATION OFF
- This is done by setting `fetch_size=0` in the `SimpleStatement`

In [41]:
from cassandra.query import SimpleStatement

In [45]:
#SimpleStatement?

In [55]:
query = """
    SELECT tag, video_id, added_date
    FROM videos_by_tag
    WHERE tag IN ('datastax', 'cassandra')
    ORDER BY added_date;
"""
statement = SimpleStatement(query, fetch_size=0)
for row in session.execute(statement):
    print(row.tag, row.video_id, row.added_date)

cassandra 245e8024-14bd-11e5-9743-8238356b7e32 2012-04-02 23:00:00
cassandra 3452f7de-14bd-11e5-855e-8738355b7e3a 2013-03-16 23:00:00
datastax 5645f8bd-14bd-11e5-af1a-8638355b8e3a 2013-04-15 23:00:00
datastax 4845ed97-14bd-11e5-8a40-8338255b7e33 2013-10-15 23:00:00
cassandra 1645ea59-14bd-11e5-a993-8138354b7e31 2014-01-28 23:00:00


## Change your query to restrict the partition key value to 'cassandra'

In [56]:
query = """
    SELECT tag, video_id, added_date
    FROM videos_by_tag
    WHERE tag = 'cassandra'
"""
rows = session.execute(query)

for row in rows:
    print(row.tag, row.video_id, row.added_date)

cassandra 1645ea59-14bd-11e5-a993-8138354b7e31 2014-01-28 23:00:00
cassandra 3452f7de-14bd-11e5-855e-8738355b7e3a 2013-03-16 23:00:00
cassandra 245e8024-14bd-11e5-9743-8238356b7e32 2012-04-02 23:00:00


## Change your query to retrieve videos made in 2013 or later.

In [59]:
query = """
    SELECT tag, video_id, added_date
    FROM videos_by_tag
    WHERE added_date >= '2013-01-01'
    ALLOW FILTERING
"""

rows = session.execute(query)

for row in rows:
    print(row.tag, row.video_id, row.added_date)

datastax 4845ed97-14bd-11e5-8a40-8338255b7e33 2013-10-15 23:00:00
datastax 5645f8bd-14bd-11e5-af1a-8638355b8e3a 2013-04-15 23:00:00
cassandra 1645ea59-14bd-11e5-a993-8138354b7e31 2014-01-28 23:00:00
cassandra 3452f7de-14bd-11e5-855e-8738355b7e3a 2013-03-16 23:00:00


- DROP THE TABLE

In [60]:
query = "DROP TABLE IF EXISTS videos_by_tag"
try:
    session.execute(query)
except Exception as e:
    print(e)

In [61]:
session.shutdown()
cluster.shutdown()