In [1]:
import sqlite3
import pandas as pd

# Connect to the SQLite database
conn = sqlite3.connect("C:/Users/l_cla/Documents/000_Project/000_000_datastored/200_structured/dataset.db")
cursor = conn.cursor()

# Define a SQL query to retrieve data from multiple tables using JOIN operations
query = '''
SELECT 
    DimDate.Year, DimDate.Quarter,
    DimNamedCost.NamedCost,
    DimIdentifier.COICOP, DimIdentifier.CDID, DimIdentifier.Source, DimIdentifier.Websource,
    FctExpenditure.ExpenditureValue, FctExpenditure.GroupTotal
FROM FctExpenditure
JOIN DimDate ON FctExpenditure.DateID = DimDate.DateID
JOIN DimNamedCost ON FctExpenditure.NamedCostID = DimNamedCost.NamedCostID
JOIN DimIdentifier ON FctExpenditure.IdentifierID = DimIdentifier.IdentifierID
'''

# Execute the query and fetch all records
records = cursor.execute(query).fetchall()

# Get the column names from the cursor's description
column_names = [desc[0] for desc in cursor.description]

# Convert the fetched records into a Pandas DataFrame
records_df = pd.DataFrame(records, columns=column_names)

# Format the 'ExpenditureValue' column to display numbers with two decimal places and thousands separators
records_df['ExpenditureValue'] = records_df['ExpenditureValue'].apply(lambda x: '{:,.2f}'.format(x))

# Display the first few records of the DataFrame (you can remove `.head()` to display all records)
records_df.head()


Unnamed: 0,Year,Quarter,NamedCost,COICOP,CDID,Source,Websource,ExpenditureValue,GroupTotal
0,1997,Q1,Totalexpenditure,NAT0,ABPB,0GSCN,https://www.ons.gov.uk/search?q=ABPB,144887000000.0,1
1,1997,Q2,Totalexpenditure,NAT0,ABPB,0GSCN,https://www.ons.gov.uk/search?q=ABPB,150474000000.0,1
2,1997,Q3,Totalexpenditure,NAT0,ABPB,0GSCN,https://www.ons.gov.uk/search?q=ABPB,155809000000.0,1
3,1997,Q4,Totalexpenditure,NAT0,ABPB,0GSCN,https://www.ons.gov.uk/search?q=ABPB,159193000000.0,1
4,1998,Q1,Totalexpenditure,NAT0,ABPB,0GSCN,https://www.ons.gov.uk/search?q=ABPB,153347000000.0,1


In [2]:
import sqlite3
import pandas as pd

# Connect to the SQLite database
conn = sqlite3.connect("C:/Users/l_cla/Documents/000_Project/000_000_datastored/200_structured/dataset.db")

# Write a SQL query to fetch the merged data
query = """
SELECT 
    d.Year, d.Quarter, 
    nc.NamedCost, 
    i.COICOP, i.CDID, i.Source, i.Websource,
    ef.ExpenditureValue, ef.GroupTotal
FROM 
    FctExpenditure ef
JOIN DimDate d ON ef.DateID = d.DateID
JOIN DimNamedCost nc ON ef.NamedCostID = nc.NamedCostID
JOIN DimIdentifier i ON ef.IdentifierID = i.IdentifierID
"""

# Convert the SQL query result into a DataFrame
df = pd.read_sql_query(query, conn)

# Close the connection to the database
conn.close()

# Print the DataFrame
print(df)

# Save the DataFrame to a CSV file
df.to_csv("C:/Users/l_cla/Documents/000_Project/000_000_datastored/300_curated/alldata.csv", header=True, index=False)


       Year Quarter                   NamedCost COICOP  CDID  Source  \
0      1997      Q1            Totalexpenditure   NAT0  ABPB   0GSCN   
1      1997      Q2            Totalexpenditure   NAT0  ABPB   0GSCN   
2      1997      Q3            Totalexpenditure   NAT0  ABPB   0GSCN   
3      1997      Q4            Totalexpenditure   NAT0  ABPB   0GSCN   
4      1998      Q1            Totalexpenditure   NAT0  ABPB   0GSCN   
...     ...     ...                         ...    ...   ...     ...   
32235  2021      Q4  UKtouristexpenditureabroad  TOUR2  ABTA  TOURCN   
32236  2022      Q1  UKtouristexpenditureabroad  TOUR2  ABTA  TOURCN   
32237  2022      Q2  UKtouristexpenditureabroad  TOUR2  ABTA  TOURCN   
32238  2022      Q3  UKtouristexpenditureabroad  TOUR2  ABTA  TOURCN   
32239  2022      Q4  UKtouristexpenditureabroad  TOUR2  ABTA  TOURCN   

                                  Websource  ExpenditureValue  GroupTotal  
0      https://www.ons.gov.uk/search?q=ABPB      1.448870e+

In [3]:
import pandas as pd
import plotly.express as px

def displaydata(source):
    # Assuming you have a dataframe named df
    # Filter the dataframe based on the specified source
    df_filtered = df[df['Source'] == source].copy(deep=True)

    # Create a new column that combines Year and Quarter
    df_filtered['Year_Quarter'] = df_filtered['Year'].astype(str) + ' ' + df_filtered['Quarter']

    # Create an interactive bar chart using Plotly Express
    fig = px.bar(df_filtered, x='Year_Quarter', y='ExpenditureValue', color='NamedCost', title='Expenditure Value by Year and Quarter', labels={'ExpenditureValue': 'Expenditure Value'}, hover_data=['NamedCost'])

    # Save the plot as an HTML file
    fig.write_html(f"C:/Users/l_cla/Documents/000_Project/000_000_datastored/300_curated/{source}plot.html")
    
    # Save the filtered data to a CSV file
    df_filtered.to_csv(f"C:/Users/l_cla/Documents/000_Project/000_000_datastored/300_curated/{source}data.csv", header=True, index=False)

    # If you want to display the plot, you can use this line to show it in the notebook or as a separate window
    # fig.show()

# Call the function with a specific source value
#displaydata('0GSCN')

In [4]:
%%javascript
IPython.OutputArea.auto_scroll_threshold = 9999

<IPython.core.display.Javascript object>

In [5]:
unique_sources = df['Source'].unique()  # Get an array of unique source values from the 'Source' column

# Iterate through each unique source value and call the displaydata function
for source in unique_sources:
    displaydata(source)


In [6]:
import pandas as pd
import plotly.express as px
import dash
from dash import dcc, html

def displaydata(df):
    unique_sources = df['Source'].unique()

    # Create a Dash app
    app = dash.Dash(__name__)

    # Create a list to hold the tabs content
    tabs_content = []

    for source in unique_sources:
        df_filtered = df[df['Source'] == source].copy(deep=True)
        df_filtered['Year_Quarter'] = df_filtered['Year'].astype(str) + ' ' + df_filtered['Quarter']

        # Create a bar chart for the current source
        fig = px.bar(df_filtered, x='Year_Quarter', y='ExpenditureValue', color='NamedCost', 
                     labels={'ExpenditureValue': 'Expenditure Value'}, hover_data=['NamedCost'])
        
        # Append the figure to the tabs content
        tabs_content.append(dcc.Tab(label=source, children=dcc.Graph(figure=fig)))

    # Define the app layout
    app.layout = html.Div([
        html.H1("Expenditure Value by Year and Quarter for All Sources"),
        dcc.Tabs(tabs_content)
    ])

    # Run the Dash app
    app.run_server(debug=True, use_reloader=False)  # Turn off reloader if inside Jupyter

# Assuming df is already defined somewhere in your code
displaydata(df)
