In [None]:
import os
import json
from datetime import (date, datetime, time)

In [None]:
os.chdir('./Data')

In [51]:


stringyfiedArchitecturesData = open('architecture_models.json', encoding="utf8")
architecturesData = json.load(stringyfiedArchitecturesData)

stringyfiedCharactersData = open('characters-creatures_models.json', encoding="utf8")
charactersData = json.load(stringyfiedCharactersData)

stringyfiedHeritageData = open('cultural-heritage-history_models.json',encoding="utf8")
heritageData = json.load(stringyfiedHeritageData)

#Example of a 3D object data descriptor
print(heritageData[0]['publishedAt'])

2016-08-23T16:51:09.378793


In [None]:
import numpy as np
import scipy as sc
import pandas as pd
import seaborn as sns
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import plotly.graph_objs as go
import plotly.express as px
import matplotlib.pyplot as plt
init_notebook_mode(connected=True)

In [59]:
usedCategories = {
    "undefined": 0,
    "characters-creatures": 1,
    "architecture": 2,
    "cultural-heritage-history": 3
}

def loadToNDimArray(modelsData, categoryCode):
  #uid = []
  likeCount = []
  commentCount = []
  viewCount = []
  faceCount = []
  vertexCount = []
  category = []
  lifetime = []

  for model in modelsData:
    #uid.append(model["uid"])
    likeCount.append(int(model["likeCount"]))
    commentCount.append(int(model["commentCount"]))
    viewCount.append(int(model["viewCount"]))
    faceCount.append(int(model["faceCount"]))
    vertexCount.append(int(model["vertexCount"]))

    try:
      deltaDays = datetime(2020, 2, 25) - datetime.strptime(
      model["publishedAt"], '%Y-%m-%dT%H:%M:%S.%f')
      lifetime.append(deltaDays.days)
    except:
      deltaDays = datetime(2020, 2, 25) - datetime.strptime(
      model["publishedAt"], '%Y-%m-%dT%H:%M:%S')
      lifetime.append(deltaDays.days)

    category.append(int(categoryCode))

  #uid = np.array(uid) 
  likeCount = np.array(likeCount)
  commentCount = np.array(commentCount)
  viewCount = np.array(viewCount)
  faceCount = np.array(faceCount)
  vertexCount = np.array(vertexCount)
  category = np.array(category)

  return np.stack(( likeCount, commentCount, viewCount, faceCount, vertexCount,  lifetime, category), axis = 0).transpose((1,0))


In [60]:
# Processing information in numpy matrix
charactersDataTable = loadToNDimArray(charactersData, 1)
architecturesDataTable = loadToNDimArray(architecturesData, 2)
heritageDataTable = loadToNDimArray(heritageData, 3)

In [56]:
dataTable = np.append(architecturesDataTable, charactersDataTable, axis=0)
dataTable = np.append(dataTable,  heritageDataTable, axis=0)

In [57]:
globalDf =  pd.DataFrame(
    data=dataTable,
    columns= ["Likes", "Comments", "Views", "FaceCount", "VertexCount", "Category"] 
)
globalDf["Category"] = globalDf['Category'].apply(np.int64)

charactersDf =  pd.DataFrame(
    data=charactersDataTable,
    columns= ["Likes", "Comments", "Views", "FaceCount", "VertexCount", "Category"] 
)
charactersDf["Category"] = charactersDf['Category'].apply(np.int64)

architecturesDf =  pd.DataFrame(
    data=architecturesDataTable,
    columns= ["Likes", "Comments", "Views", "FaceCount", "VertexCount", "Category"] 
)
architecturesDf["Category"] = architecturesDf['Category'].apply(np.int64)

heritageDf =  pd.DataFrame(
    data=heritageDataTable,
    columns= ["Likes", "Comments", "Views", "FaceCount", "VertexCount", "Category"] 
)
heritageDf["Category"] = heritageDf["Category"].apply(np.int64)


In [None]:
#Dataframe tools
def appendCalculatedColumns(df):
    df["LikeViewRateo"] = df["Likes"]/df["Views"]
    return df

In [None]:
heritageDf = appendCalculatedColumns(heritageDf)
globalDf = appendCalculatedColumns(globalDf)
charactersDf = appendCalculatedColumns(charactersDf)
architecturesDf = appendCalculatedColumns(architecturesDf)

In [None]:
#Data are ready to be processed
px.scatter(
    charactersDf,
    x='Views',
    y='Likes',
)

In [None]:
#Data are ready to be processed
px.scatter(
    architecturesDf,
    x='Views',
    y='Likes',
)

In [None]:
px.scatter(
    heritageDf,
    x='Views',
    y='Likes',
)

In [None]:
print(json.dumps(usedCategories,indent=2))
px.scatter(globalDf,
    x="Views",
    y="Likes",
    color="Category",   
)

In [58]:
charactersDf.describe()

Unnamed: 0,Likes,Comments,Views,FaceCount,VertexCount,Category
count,1301.0,1301.0,1301.0,1301.0,1301.0,1301.0
mean,370.511914,13.614143,20846.899308,177961.7,106434.1,1.0
std,429.168003,16.32884,33792.651004,386714.1,228251.6,0.0
min,0.0,0.0,7633.0,10.0,246.0,1.0
25%,114.0,4.0,9444.0,11836.0,7667.0,1.0
50%,240.0,9.0,12785.0,34630.0,22626.0,1.0
75%,470.0,18.0,20679.0,150293.0,95292.0,1.0
max,3871.0,175.0,578349.0,4866654.0,2687187.0,1.0


In [None]:
globalDf[globalDf.Views <= 200000].describe()

In [None]:
globalDf[globalDf.Views <= 50000].describe()

In [None]:
architecturesDf.describe()

In [None]:
heritageDf.describe()

In [None]:
globalDf.describe()