# If required, use the following code snippets to workaround the "Insert to Code" bug in DSX Local 1.1.2
### There are 3 code cells below:
    * The first two are code snippets for the two datasets used in the Python notebook
    * The third is the code snippet for the dataset used by the Scala notebook

In [None]:
# For Python visualization notebook: Automatically generated code for inserting Pandas dataframe for "churn_rate_visualization" dataset
import dsx_core_utils
import requests
import jaydebeapi
from pyspark.sql import SparkSession
import os
import pandas as pd
# Add asset from remote connection
df3 = None
dataSet = dsx_core_utils.get_remote_data_set_info('churn_rate_visualization')
dataSource = dsx_core_utils.get_data_source_info(dataSet['datasource'])
#Differentiate between hdfs and jdbc
if (dataSource['type'] == "HDFS"):
	#Use case for RPC port with hdfs protocol
	if (dataSource['URL'] == ""):
		url = 'hdfs://' + dataSource['host'] + ':' + str(dataSource['port'])
		path = dataSet['file']
		file_fullpath = url + path
		sparkSession = SparkSession(sc).builder.getOrCreate()
		df3 = sparkSession.read.csv(file_fullpath, header = "true", inferSchema = "true")
		df3 = df3.toPandas()
	#Use case for HTTP Port with a webhdfs URL
	else:
		if (dataSource['URL'].endswith('/')):
			url = dataSource['URL'][:-1] + dataSet['file'] + "?op=OPEN"
		else:
			url = dataSource['URL'] + dataSet['file'] + "?op=OPEN"
		headers = {"Authorization": os.environ.get('DSX_TOKEN')}
		response = requests.request("GET", url, headers=headers, timeout=10, verify=False, allow_redirects=True)
		if response.status_code != 200:
			raise Exception("get_data_source_info: " + str(response.status_code) + " returned when sending a request to \"" + url+"\"")
		else:
			ext = ""
			if (dataSet['file'].endswith('csv')):
				ext = ".csv"
			elif (dataSet['file'].endswith('txt')):
				ext = ".txt"
			else:
				raise Exception("Invalid file type that is not txt or csv")
			csvFile = open("output" + ext,"w+")
			csvFile.write(response.text)
			csvFile.close()
			df3 = pd.read_csv("output" + ext)
			# Files output.csv/output.txt/output.pkl will persist
			df3.to_pickle('output.pkl')
			df3 = pd.read_pickle('output.pkl')
else:
	conn = jaydebeapi.connect(dataSource['driver_class'], [dataSource['URL'], dataSource['user'], dataSource['password']])
	if (len(dataSet['schema'].strip()) != 0):
		df3 = pd.read_sql('select * from ' + dataSet['schema'] + '.' + dataSet['table'], con=conn)
	else:
		df3 = pd.read_sql('select * from ' + dataSet['table'], con=conn)
df3.head()


In [None]:
# For Python visualization notebook:  Automatically generated code for inserting Pandas dataframe for "cust_summary_visualization" dataset

df4 = None
dataSet = dsx_core_utils.get_remote_data_set_info('cust_summary_visualization')
dataSource = dsx_core_utils.get_data_source_info(dataSet['datasource'])
#Differentiate between hdfs and jdbc
if (dataSource['type'] == "HDFS"):
	#Use case for RPC port with hdfs protocol
	if (dataSource['URL'] == ""):
		url = 'hdfs://' + dataSource['host'] + ':' + str(dataSource['port'])
		path = dataSet['file']
		file_fullpath = url + path
		sparkSession = SparkSession(sc).builder.getOrCreate()
		df4 = sparkSession.read.csv(file_fullpath, header = "true", inferSchema = "true")
		df4 = df4.toPandas()
	#Use case for HTTP Port with a webhdfs URL
	else:
		if (dataSource['URL'].endswith('/')):
			url = dataSource['URL'][:-1] + dataSet['file'] + "?op=OPEN"
		else:
			url = dataSource['URL'] + dataSet['file'] + "?op=OPEN"
		headers = {'Authorization': os.environ.get('DSX_TOKEN')}
		response = requests.request("GET", url, headers=headers, timeout=10, verify=False, allow_redirects=True)
		if response.status_code != 200:
			raise Exception("get_data_source_info: " + str(response.status_code) + " returned when sending a request to \"" + url+"\"")
		else:
			ext = ""
			if (dataSet['file'].endswith('csv')):
				ext = ".csv"
			elif (dataSet['file'].endswith('txt')):
				ext = ".txt"
			else:
				raise Exception("Invalid file type that is not txt or csv")
			csvFile = open("output" + ext,"w+")
			csvFile.write(response.text)
			csvFile.close()
			df4 = pd.read_csv("output" + ext)
			# Files output.csv/output.txt/output.pkl will persist
			df4.to_pickle('output.pkl')
			df4 = pd.read_pickle('output.pkl')
else:
	conn = jaydebeapi.connect(dataSource['driver_class'], [dataSource['URL'], dataSource['user'], dataSource['password']])
	if (len(dataSet['schema'].strip()) != 0):
		df4 = pd.read_sql('select * from ' + dataSet['schema'] + '.' + dataSet['table'], con=conn)
	else:
		df4 = pd.read_sql('select * from ' + dataSet['table'], con=conn)
df4.head()



In [None]:
# For Scala model training and deployment notebook:  Automatically generated code for inserting Spark dataframe for "cust_summary_notebook_training" dataset
import com.ibm.analytics.dsxCoreUtils._
import scala.util.{Try, Success, Failure}
import java.io._
// Add asset from remote connection
val data = new DataUtil()
val retTryDf = data.getRemoteDataSet(sc, "cust_summary_notebook_training")
val df2 = retTryDf.get
df2.show(5)