In [None]:
import pandas as pd
import numpy as np
import pyreadstat  ##conda install conda-forge::pyreadstat
import sqlalchemy as sa
from sqlalchemy.engine import URL
import pyodbc

In [None]:
server = 'gisvaltech2025.cucrxmmsbhhf.us-east-1.rds.amazonaws.com' 
database = 'workshop' 
username = 'tester' 
password = 'gisvaltech' 

connection_string = "DRIVER={ODBC Driver 17 for SQL Server};SERVER=%s;DATABASE=%s;UID=%s;PWD=%s" % (server, database, username, password)

In [None]:
## Read SPSS sav file using read_spss

data = pd.read_spss('..\\SPSS\\Data\\GISValTechSampleData.sav')
data.head()
data['ParcelId'] = data['ParcelId'].astype(int)
data['SaleDate'] = pd.to_datetime(data['SaleDate'])
data['SalesPrice'] = data['SalesPrice'].astype(int)
data['Sqft'] = data['Sqft'].astype(int)
data['LandSize'] = data['LandSize'].astype(int)
data['Quality'] = pd.Categorical(data['Quality'], ordered = True, categories = [
    'Poor', 'BelowAverage', 'Average', 'AboveAverage', 'Superior'])
data['GarageSize'] = data['GarageSize'].astype(int)
data['EffAge'] = data['EffAge'].astype(int)
data['NBHD'] = pd.Categorical(data['NBHD'].astype(int))

data.set_index('ParcelId', inplace = True)
data.head()

In [None]:
# Export data to spss using pyreadstat
pyreadstat_data = pd.DataFrame({"ParcelId" : [12345, 12346, 12347, 12348],
                     "LivingArea" : [1500, 1800, 2000, 2250]})

pyreadstat.write_sav(pyreadstat_data, '..//exported data//spss.sav')

In [None]:
## Extract data from SQL database using pyodbc
## without a warning

cnxn = pyodbc.connect(connection_string)
cursor = cnxn.cursor()
cursor.execute("""SELECT [ParcelId], [SaleDate], [SalesPrice], [Sqft], [LandSize], [Bathrooms], [Quality],
      [GarageSize], [EffAge], [NBHD] FROM SampleData""")
pyodbc_data = pd.DataFrame([])
for row in cursor.fetchall():
    data = pd.Series([elem for elem in row])
    pyodbc_data = pd.concat([pyodbc_data, data.to_frame().T], ignore_index=True)

pyodbc_data.columns = ["ParcelId", "SaleDate", "SalesPrice", "Sqft", "LandSize", "Bathrooms", "Quality",
      "GarageSize", "EffAge", "NBHD"]

print(pyodbc_data)



In [None]:
## Extract data from SQL database using pyodbc
## this will throw a warning

cnxn = pyodbc.connect(connection_string)
data = pd.read_sql_query("SELECT * FROM SampleData", cnxn)
data

In [None]:
## Export data to SQL using pyodbc
cnxn = pyodbc.connect(connection_string)
cursor = cnxn.cursor()
pyodbc_data = pd.DataFrame({
    "ParcelId" : [12345, 12346, 12347, 12348],
    "SaleDate" : ['01-01-2023', '02-01-2023', '03-01-2023', '04-01-2023'],
    "SalesPrice" : [100000, 150000, 200000, 250000]
})
for index, row in pyodbc_data.iterrows():
    cursor.execute("INSERT INTO pyodbcTestData (ParcelId, SaleDate, SalesPrice) values (?, ?, ?)", row['ParcelId'], row['SaleDate'], row['SalesPrice'])
cnxn.commit()
cursor.close()

In [None]:
## Extract data from SQL database using sqlalchemy

connection_url = URL.create("mssql+pyodbc", query={"odbc_connect": connection_string})

from sqlalchemy import create_engine
engine = create_engine(connection_url)

with engine.begin() as conn:
   sqlalchemy_data = pd.read_sql_query(sa.text("SELECT * FROM SampleData;"), conn)

sqlalchemy_data.sample(5)

In [None]:
## Export data to SQL using sqlalchemy
connection_url = URL.create("mssql+pyodbc", query={"odbc_connect": connection_string})

from sqlalchemy import create_engine
engine = create_engine(connection_url)

sqlalchemy_data = pd.DataFrame({
    "ParcelId" : [12345, 12346, 12347, 12348],
    "EffAge" : [12, 23, 44, 67]
})
sqlalchemy_data.to_sql(name='TestData_Brad', if_exists='replace', con=engine)

In [None]:
## Export data to a pickle file
data.to_pickle('..\\exported data\\PickleExport.pkl')

In [None]:
## Read pickle file

data = pd.read_pickle('..\\exported data\\PickleExport.pkl')
data.sample(5)

In [None]:
## Export to excel file
data.to_excel('..\\exported data\\excelExport.xlsx')

In [None]:
## Read excel file

data = pd.read_excel('..\\exported data\\excelExport.xlsx')
data.sample(5)

In [None]:
## Export data to csv
data.to_csv('..\\exported data\\CsvExport.csv')

In [None]:
## Read csv file
data = pd.read_csv('..\\exported data\\CsvExport.csv')
data.sample(5)