In [1]:
import csv
import json
import requests
import sys
import oasis


from pyArango.connection import *
from pyArango.collection import Collection, Edges, Field
from pyArango.graph import Graph, EdgeDefinition
from pyArango.collection import BulkOperation as BulkOperation

In [3]:
print("User Data")
!head -n 3 data/users.csv 
print()
print("Movies Data")
!head -n 3 data/movies.csv 
print()
print("Rating Data")
!head -n 3 data/ratings.csv 

User Data
user_id,Age,Gender,occupation,zip_code
1,35,M,engineer,94117
2,53,F,other,94043

Movies Data
movie_id, movie title , release date , video release date , IMDb URL , unknown , Action , Adventure , Animation , Children's , Comedy , Crime , Documentary , Drama , Fantasy , Film-Noir , Horror , Musical , Mystery , Romance , Sci-Fi , Thriller , War , Western 
1,Toy Story (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Toy%20Story%20(1995),0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0
2,GoldenEye (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?GoldenEye%20(1995),0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0

Rating Data
user_id,item_id,Rating,Timestamp
186,302,3,891717742
22,377,1,878887116


In [7]:
# Retrieve tmp credentials from ArangoDB Tutorial Service
login = oasis.getTempCredentials()

# Connect to the temp database
conn = oasis.connect_c(login)
db = conn[login["dbName"]] 

Reusing cached credentials.


In [8]:
from pyArango.collection import Collection, Field
from pyArango.graph import Graph, EdgeDefinition


class Users(Collection):
    _fields = {
        "user_id": Field(),
        "age": Field(),
        "gender": Field()
    }
    
class Movies(Collection):
    _fields = {
        "movie_id": Field(),
        "movie_title": Field(),
        "release_data": Field()
    }

class Ratings(Edges): 
    _fields = {
        #user_id and item_id are encoded by _from, _to 
        "rating": Field(),
        "timestamp": Field()
    }

class IMDBGraph(Graph) :
    _edgeDefinitions = [EdgeDefinition("Ratings", fromCollections=["Users"], toCollections=["Movies"])]
    _orphanedCollections = []

db.createCollection("Users")
db.createCollection("Movies")
db.createCollection("Ratings")
iMDBGraph = db.createGraph("IMDBGraph")

print("Collection/Graph Setup done.")

Collection/Graph Setup done.


In [9]:
collection = db["Users"]
with BulkOperation(collection, batchSize=100) as col:
    with open('data/users.csv', newline='') as csvfile:
        reader = csv.reader(csvfile, delimiter=',', quotechar='|')
        #Skip header
        next(reader)
        for row in reader:
            user_id,age,gender,occupation,zip = tuple(row)
            doc = col.createDocument()
            doc["_key"] = user_id
            doc["age"] = age
            doc["gender"] = gender
            doc.save()

collection = db["Movies"]
with BulkOperation(collection, batchSize=100) as col:
    with open('data/movies.csv', newline='') as csvfile:
        reader = csv.reader(csvfile, delimiter=',', quotechar='|')
        #Skip header
        next(reader)
        for row in reader:
            movie_id, movie_title , release_date , video_release_date , url , unknown , action , adventure , animation , childrens , comedy , crime , documentary , drama , fantasy , noir , horror , musical , mystery , romance , scifi , thriller , war , western = tuple(row)
            doc = col.createDocument()
            doc["_key"] = movie_id
            doc["movie_title"] = movie_title
            doc["release_date"] = release_date
            doc.save()

collection = db["Ratings"]
with BulkOperation(collection, batchSize=1000) as col:
    with open('data/ratings.csv', newline='') as csvfile:
        reader = csv.reader(csvfile, delimiter=',', quotechar='|')
        #Skip header
        next(reader)
        for row in reader:
            user_id,movie_id,rating,timestamp = tuple(row)
            doc = col.createDocument()
            doc["_from"] = "Users/"+user_id
            doc["_to"] = "Movies/"+movie_id
            doc["ratings"] = rating
            doc["timestamp"] = timestamp
            doc.save()
        
print("Import Done")

Import Done
