# Read raw landed data in an adhoc manner
This will show you how to view and interpret raw landed data in an interactive way. This would generally only be used to verify data has landed and is valid. Once that is done a scheduled spark job should be setup to move the data into shape and format suitable for access (a wide table using delta as the format is suitable for most usecases).

### Establish spark interactive context

In [None]:
from neuro_python.neuro_compute import spark_manager as spm

In [None]:
spm.list_workspaces()

In [None]:
spm.list_clusters(workspace_id='e93714e3-1506-40a6-941d-50b808cf935c')

In [None]:
spm.start_cluster(cluster_id='3ca51b3c-fdd2-4700-ae0d-79548cca85e8',workspace_id='e93714e3-1506-40a6-941d-50b808cf935c')

In [None]:
spm.create_context("test1",cluster_id='3ca51b3c-fdd2-4700-ae0d-79548cca85e8',workspace_id='e93714e3-1506-40a6-941d-50b808cf935c')

### Ensure libraries are installed on cluster

In [None]:
spm.list_libraries(cluster_id='3ca51b3c-fdd2-4700-ae0d-79548cca85e8',workspace_id='e93714e3-1506-40a6-941d-50b808cf935c')

In [None]:
spm.install_library('neuro-python-clients','0.0.15',library_uri='https://pkgs.dev.azure.com/DownerD3S/b192675d-16a5-456b-8f8b-7fc483740331/_packaging/NeuroHelpers/pypi/simple/',cluster_id='3ca51b3c-fdd2-4700-ae0d-79548cca85e8',workspace_id='e93714e3-1506-40a6-941d-50b808cf935c')

### Import and initialise the chunk reader module

In [None]:
%%spark
from neuro_python_clients.pyspark import chunked_reader as cr
cr.init(spark)

### Import the raw data table

In [None]:
%%spark_import_table
import_table('df1','LeeTestGen2','NvEventHubHub1RawData')

### View raw data in the table

In [None]:
%%spark_sql
select *
from df1
where Day=8 and
Month=4 and
Year=2020 and
PartitionId=0
limit 30

### Combine and reshape the raw data

In [None]:
%%spark
part_dets=cr.PartitionDetails(0,2020,4,8)
df2=cr.combine_chunked_messages_v1_4(df1,part_dets)

In [None]:
%spark_pandas -df df2

### Interpret binary reference data

In [None]:
%%spark
import json
import pickle
from pyspark.sql import functions as psf
def pickle2json(bytedata):
    return json.dumps(pickle.loads(bytedata))
udf_pickle2json = psf.udf(pickle2json)

In [None]:
%%spark
df3=df2.filter(df2.interfaceName==psf.lit('testInterface2')).select(udf_pickle2json(df2.binaryData).alias('json'))

In [None]:
%spark_pandas -df df3