In [1]:
import pandas as pd
import numpy as np
import pyreadstat  ##conda install conda-forge::pyreadstat
import pyodbc
import sqlalchemy as sa
from sqlalchemy.engine import URL


In [11]:
server = '' 
database = 'Workshop' 
username = '' 
password = '' 

connection_string = "DRIVER={SQL Server};SERVER=%s;DATABASE=%s;UID=%s;PWD=%s" % (server, database, username, password)

In [None]:
## Extract data from SQL database using pyodbc

cnxn = pyodbc.connect(connection_string)
cursor = cnxn.cursor()
query = "SELECT * FROM SampleData;"
df = pd.read_sql(query, cnxn)
df.sample(5)


In [4]:
## Extract data from SQL database using sqlalchemy

connection_url = URL.create("mssql+pyodbc", query={"odbc_connect": connection_string})

from sqlalchemy import create_engine
engine = create_engine(connection_url)

with engine.begin() as conn:
    data = pd.read_sql_query(sa.text("SELECT * FROM SampleData;"), conn)

data.sample(5)

OperationalError: (pyodbc.OperationalError) ('08001', '[08001] [Microsoft][ODBC SQL Server Driver][DBNETLIB]SQL Server does not exist or access denied. (17) (SQLDriverConnect); [08001] [Microsoft][ODBC SQL Server Driver][DBNETLIB]ConnectionOpen (Connect()). (53)')
(Background on this error at: https://sqlalche.me/e/20/e3q8)

In [None]:
## Read SPSS sav file using read_spss

data = pd.read_spss('..\\SPSS\\Data\\GISValTechSampleData.sav')
data.head()
data['ParcelId'] = data['ParcelId'].astype(int)
data['SaleDate'] = pd.to_datetime(data['SaleDate'])
data['SalesPrice'] = data['SalesPrice'].astype(int)
data['Sqft'] = data['Sqft'].astype(int)
data['LandSize'] = data['LandSize'].astype(int)
data['Quality'] = pd.Categorical(data['Quality'], ordered = True, categories = [
    'Poor', 'BelowAverage', 'Average', 'AboveAverage', 'Superior'])
data['GarageSize'] = data['GarageSize'].astype(int)
data['EffAge'] = data['EffAge'].astype(int)
data['NBHD'] = pd.Categorical(data['NBHD'].astype(int))

data.set_index('ParcelId', inplace = True)
data.head()

In [None]:
## Read pickle file

data = pd.read_pickle('..\\datafiles\\SampleData.pkl')
data.sample(5)

In [None]:
## Read excel file

data = pd.read_excel('..\\datafiles\\SampleData.xlsx')
data.sample(5)

In [None]:
## Read csv file
data = pd.read_csv('..\\datafiles\\SampleData.csv')
data.sample(5)

In [None]:
## Export data to SQL using pyodbc
cnxn = pyodbc.connect(connection_string)
cursor = cnxn.cursor()
cursor.execute('''
    IF OBJECT_ID (N'dbo.pyodbcTestData', N'U') IS NULL  
        CREATE TABLE dbo.pyodbcTestData(
            ParcelId INT,
            SaleDate DATETIME,
            SalesPrice INT
        );
''')
for index, row in data.loc[:5].iterrows():
    cursor.execute("INSERT INTO pyodbcTestData (ParcelId, SaleDate, SalesPrice) values (?, ?, ?)", row['ParcelId'], row['SaleDate'], row['SalesPrice'])
cnxn.commit()
cursor.close()


In [None]:
## Export data to SQL using sqlalchemy
data.to_sql(name='alchemyTestData', con=engine)

In [None]:
## Export data to Excel
data.to_excel('..\\datafiles\\ExcelExport.xlsx')

In [None]:
## Export data to csv
data.to_csv('..\\datafiles\\CsvExport.csv')

In [None]:
## Export data to a pickle file
data.to_pickle('..\\datafiles\\PickleExport.pkl')