In [None]:
# Create a dummy paste.txt file with sample data
sample_data = """Business Date      | On Loan Quantity,Security Price
2023-01-01      | 1000,50.50
2023-01-02      | 1200,51.00
2023-01-03      | 1100,50.80
2023-01-04      | 1300,51.20
2023-01-05      | 1500,51.50
"""

with open('paste.txt', 'w') as f:
    f.write(sample_data)

print("Dummy 'paste.txt' file created.")

In [None]:
# Load paste.txt, rename columns appropriately and create chart
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter

# Adjusted separator to correctly parse columns
paste_df = pd.read_csv('paste.txt', sep='\s*\|\s*', engine='python')

# Split the second column into 'On Loan Quantity' and 'Security Price'
paste_df[['On Loan Quantity', 'Security Price']] = paste_df['On Loan Quantity,Security Price'].str.split(',', expand=True)

# Drop the original combined column
paste_df = paste_df.drop(columns=['On Loan Quantity,Security Price'])

# Clean column names
paste_df.columns = [c.strip() for c in paste_df.columns]

# Rename expected columns
col_date = 'Business Date'
col_qty = 'On Loan Quantity'
col_price = 'Security Price'

# Convert data types
paste_df[col_date] = pd.to_datetime(paste_df[col_date])
paste_df[col_qty] = pd.to_numeric(paste_df[col_qty])
paste_df[col_price] = pd.to_numeric(paste_df[col_price])


paste_df = paste_df.sort_values(col_date)

fig, ax1 = plt.subplots(figsize=(12,7))

border_color = '#048dd2'
loan_color = '#f57600'
fig.patch.set_edgecolor(border_color)
fig.patch.set_linewidth(16)

ax1.plot(paste_df[col_date], paste_df[col_price], color=border_color, linewidth=3.0, label='Price per Share (USD)')
ax1.set_ylabel('Price per Share (USD)', fontsize=12, fontweight='bold', color='#374151', labelpad=8)
ax1.tick_params(axis='y', labelcolor=border_color, labelsize=10)
for lab in ax1.get_yticklabels(): lab.set_fontweight('bold')
ax1.set_xlim(left=paste_df[col_date].min())
ax1.set_xmargin(0)

ax2 = ax1.twinx()
ax2.plot(paste_df[col_date], paste_df[col_qty]/1e6, color=loan_color, linewidth=2.5, label='Loan Quantity (MM shares)')
ax2.set_ylabel('Loan Quantity (MM shares)', fontsize=12, fontweight='bold', color='#374151', rotation=270, labelpad=15, va='bottom')
ax2.tick_params(axis='y', labelcolor=loan_color, labelsize=10)
ax2.yaxis.set_major_formatter(FuncFormatter(lambda x,pos: format(x,'.2f')))
for lab in ax2.get_yticklabels(): lab.set_fontweight('bold')

plt.title('Default Chart', fontsize=16, fontweight='bold', color='#006db7')

lines, labels = [], []
for ax in [ax1, ax2]:
    lns, lbl = ax.get_legend_handles_labels()
    lines += lns
    labels += lbl
leg = plt.legend(lines, labels, loc='upper left', frameon=True, fontsize=12)
leg.get_frame().set_facecolor('white'); leg.get_frame().set_edgecolor('none')
for txt in leg.get_texts(): txt.set_fontweight('bold')

for spine in ax1.spines.values(): spine.set_edgecolor('#BDDFFF'); spine.set_linewidth(1.5)
for spine in ax2.spines.values(): spine.set_edgecolor('#BDDFFF'); spine.set_linewidth(1.5)

plt.annotate('Source: EquiLend Data & Analytics', (0.975, 0.025), xycoords='figure fraction', ha='right', fontsize=12, fontweight='bold', color='#006db7')
plt.tight_layout()
plt.show()

# Task
Create dual-axis charts for Kohl's and OpenDoor data, visualizing price against borrow quantity, borrow demand (utilization), or borrow cost using the datasets from "/content/kohls_kss.csv" and "/content/opendoor.csv".

## Load the data

### Subtask:
Load the data from the provided CSV files into pandas DataFrames.


**Reasoning**:
Load the data from the two CSV files into pandas DataFrames as instructed.



In [None]:
kohls_df = pd.read_csv('/content/kohls_kss.csv')
opendoor_df = pd.read_csv('/content/opendoor.csv')

**Reasoning**:
The previous command failed because pandas was not imported. I need to import pandas before reading the CSV files.



In [None]:
import pandas as pd

kohls_df = pd.read_csv('/content/kohls_kss.csv')
opendoor_df = pd.read_csv('/content/opendoor.csv')

## Inspect and preprocess data

### Subtask:
Examine the dataframes to understand their structure, column names, and data types. Perform necessary data cleaning and preprocessing, such as converting date columns to datetime objects and numeric columns to appropriate types.


**Reasoning**:
Display the head and info of both dataframes to understand their structure and data types.



In [None]:
display(kohls_df.head())
display(opendoor_df.head())
kohls_df.info()
opendoor_df.info()

**Reasoning**:
Convert the 'Business Date' column to datetime objects and numeric columns to appropriate types in both dataframes, then display the info again to confirm the conversions.



In [None]:
kohls_df['Business Date'] = pd.to_datetime(kohls_df['Business Date'])
opendoor_df['Business Date'] = pd.to_datetime(opendoor_df['Business Date'])

numeric_cols_kohls = ['Security Price', 'Active Utilization', 'Squeeze Score', 'Utilization', '30 Day Avg Volume', 'Fee All (bps)', 'On Loan Quantity', 'Utilization.1', 'Total Lendable Quantity']
for col in numeric_cols_kohls:
    kohls_df[col] = pd.to_numeric(kohls_df[col], errors='coerce')

numeric_cols_opendoor = ['Security Price', 'Active Utilization', 'Squeeze Score', 'Utilization', 'Broker-to-Broker Loan Quantity', '30 Day Avg Volume', 'Fee 1 (bps)', 'On Loan Quantity', 'Utilization.1', 'Total Lendable Quantity', 'On Loan Quantity.1', 'Total Lendable Quantity.1']
for col in numeric_cols_opendoor:
    opendoor_df[col] = pd.to_numeric(opendoor_df[col], errors='coerce')

kohls_df.info()
opendoor_df.info()

## Merge or combine data (if necessary)

### Subtask:
Determine if merging or combining data from both datasets is necessary for the analysis and perform the merge if required.


**Reasoning**:
Assess the need for merging and if necessary, merge the dataframes on 'Business Date'. Then display the head and info of the resulting dataframe.



In [None]:
# For the planned analysis (creating dual-axis charts for each company),
# merging the dataframes is not necessary as the visualizations will be
# generated independently for each company's data. The goal is not to
# compare metrics on the same date across companies within a single chart
# or perform a joint analysis requiring combined data.
# Therefore, no merge operation is required.

## Identify relevant columns

### Subtask:
Identify the columns corresponding to date, price, and borrow metrics (quantity, utilization, or cost) in each dataset.


**Reasoning**:
Identify and store the column names for date, price, and borrow metrics for both dataframes as instructed.



In [None]:
# Identify column names for kohls_df
kohls_date_col = 'Business Date'
kohls_price_col = 'Security Price'
kohls_qty_col = 'On Loan Quantity'
kohls_utilization_col = 'Utilization' # There are two Utilization columns, choosing the one without .1
kohls_fee_col = 'Fee All (bps)'

# Identify column names for opendoor_df
opendoor_date_col = 'Business Date'
opendoor_price_col = 'Security Price'
opendoor_qty_col = 'On Loan Quantity' # There are two On Loan Quantity columns, choosing the one without .1
opendoor_utilization_col = 'Utilization' # There are two Utilization columns, choosing the one without .1
opendoor_fee_col = 'Fee 1 (bps)'

## Create dual-axis charts

### Subtask:
For each dataset, generate a dual-axis chart with date on the x-axis, price on one y-axis, and a chosen borrow metric on the other y-axis.


**Reasoning**:
Generate the three dual-axis charts for the Kohl's data as per the instructions.



In [None]:
import matplotlib.pyplot as plt

# Chart 1: Price vs On Loan Quantity for Kohl's
fig, ax1 = plt.subplots(figsize=(12, 7))
ax1.plot(kohls_df[kohls_date_col], kohls_df[kohls_price_col], color='tab:blue', label='Security Price')
ax1.set_xlabel(kohls_date_col)
ax1.set_ylabel(kohls_price_col, color='tab:blue')
ax1.tick_params(axis='y', labelcolor='tab:blue')
ax2 = ax1.twinx()
ax2.plot(kohls_df[kohls_date_col], kohls_df[kohls_qty_col], color='tab:red', label='On Loan Quantity')
ax2.set_ylabel(kohls_qty_col, color='tab:red')
ax2.tick_params(axis='y', labelcolor='tab:red')
fig.suptitle('Kohl\'s: Security Price vs On Loan Quantity', fontsize=16)
fig.legend(loc="upper left", bbox_to_anchor=(0.1, 0.9))
plt.show()

# Chart 2: Price vs Utilization for Kohl's
fig, ax1 = plt.subplots(figsize=(12, 7))
ax1.plot(kohls_df[kohls_date_col], kohls_df[kohls_price_col], color='tab:blue', label='Security Price')
ax1.set_xlabel(kohls_date_col)
ax1.set_ylabel(kohls_price_col, color='tab:blue')
ax1.tick_params(axis='y', labelcolor='tab:blue')
ax2 = ax1.twinx()
ax2.plot(kohls_df[kohls_date_col], kohls_df[kohls_utilization_col], color='tab:green', label='Utilization')
ax2.set_ylabel(kohls_utilization_col, color='tab:green')
ax2.tick_params(axis='y', labelcolor='tab:green')
fig.suptitle('Kohl\'s: Security Price vs Utilization', fontsize=16)
fig.legend(loc="upper left", bbox_to_anchor=(0.1, 0.9))
plt.show()

# Chart 3: Price vs Fee All (bps) for Kohl's
fig, ax1 = plt.subplots(figsize=(12, 7))
ax1.plot(kohls_df[kohls_date_col], kohls_df[kohls_price_col], color='tab:blue', label='Security Price')
ax1.set_xlabel(kohls_date_col)
ax1.set_ylabel(kohls_price_col, color='tab:blue')
ax1.tick_params(axis='y', labelcolor='tab:blue')
ax2 = ax1.twinx()
ax2.plot(kohls_df[kohls_date_col], kohls_df[kohls_fee_col], color='tab:orange', label='Fee All (bps)')
ax2.set_ylabel(kohls_fee_col, color='tab:orange')
ax2.tick_params(axis='y', labelcolor='tab:orange')
fig.suptitle('Kohl\'s: Security Price vs Fee All (bps)', fontsize=16)
fig.legend(loc="upper left", bbox_to_anchor=(0.1, 0.9))
plt.show()

**Reasoning**:
Generate the three dual-axis charts for the OpenDoor data as per the instructions.



In [None]:
# Chart 4: Price vs On Loan Quantity for OpenDoor
fig, ax1 = plt.subplots(figsize=(12, 7))
ax1.plot(opendoor_df[opendoor_date_col], opendoor_df[opendoor_price_col], color='tab:blue', label='Security Price')
ax1.set_xlabel(opendoor_date_col)
ax1.set_ylabel(opendoor_price_col, color='tab:blue')
ax1.tick_params(axis='y', labelcolor='tab:blue')
ax2 = ax1.twinx()
ax2.plot(opendoor_df[opendoor_date_col], opendoor_df[opendoor_qty_col], color='tab:red', label='On Loan Quantity')
ax2.set_ylabel(opendoor_qty_col, color='tab:red')
ax2.tick_params(axis='y', labelcolor='tab:red')
fig.suptitle('OpenDoor: Security Price vs On Loan Quantity', fontsize=16)
fig.legend(loc="upper left", bbox_to_anchor=(0.1, 0.9))
plt.show()

# Chart 5: Price vs Utilization for OpenDoor
fig, ax1 = plt.subplots(figsize=(12, 7))
ax1.plot(opendoor_df[opendoor_date_col], opendoor_df[opendoor_price_col], color='tab:blue', label='Security Price')
ax1.set_xlabel(opendoor_date_col)
ax1.set_ylabel(opendoor_price_col, color='tab:blue')
ax1.tick_params(axis='y', labelcolor='tab:blue')
ax2 = ax1.twinx()
ax2.plot(opendoor_df[opendoor_date_col], opendoor_df[opendoor_utilization_col], color='tab:green', label='Utilization')
ax2.set_ylabel(opendoor_utilization_col, color='tab:green')
ax2.tick_params(axis='y', labelcolor='tab:green')
fig.suptitle('OpenDoor: Security Price vs Utilization', fontsize=16)
fig.legend(loc="upper left", bbox_to_anchor=(0.1, 0.9))
plt.show()

# Chart 6: Price vs Fee 1 (bps) for OpenDoor
fig, ax1 = plt.subplots(figsize=(12, 7))
ax1.plot(opendoor_df[opendoor_date_col], opendoor_df[opendoor_price_col], color='tab:blue', label='Security Price')
ax1.set_xlabel(opendoor_date_col)
ax1.set_ylabel(opendoor_price_col, color='tab:blue')
ax1.tick_params(axis='y', labelcolor='tab:blue')
ax2 = ax1.twinx()
ax2.plot(opendoor_df[opendoor_date_col], opendoor_df[opendoor_fee_col], color='tab:orange', label='Fee 1 (bps)')
ax2.set_ylabel(opendoor_fee_col, color='tab:orange')
ax2.tick_params(axis='y', labelcolor='tab:orange')
fig.suptitle('OpenDoor: Security Price vs Fee 1 (bps)', fontsize=16)
fig.legend(loc="upper left", bbox_to_anchor=(0.1, 0.9))
plt.show()

## Customize charts

### Subtask:
Enhance the charts with appropriate titles, labels, legends, and visual styles for better readability.


**Reasoning**:
Enhance the previously generated charts by adding titles, axis labels, legends, and visual styles for better readability and consistency across all six charts, ensuring the customizations identified in the previous steps are incorporated.



In [None]:
import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter

# Define common styles and colors
price_color = '#048dd2' # EquiLend blue
loan_qty_color = '#f57600' # EquiLend orange
utilization_color = '#006db7' # EquiLend dark blue
fee_color = '#8c564b' # A distinct color for fees
line_width_price = 3.0
line_width_borrow = 2.5
title_fontsize = 16
label_fontsize = 12
tick_label_fontsize = 10
title_fontweight = 'bold'
label_fontweight = 'bold'
tick_label_fontweight = 'bold'
border_color = '#BDDFFF' # Light blue border

# Function to format loan quantity in Millions
def million_formatter(x, pos):
    return f'{x/1e6:.2f}M'

# Function to format percentage for utilization
def percent_formatter(x, pos):
    return f'{x:.0f}%'

# Function to format fee in basis points
def bps_formatter(x, pos):
    return f'{x:.2f} bps'

# Function to create and style a dual-axis chart
def create_dual_axis_chart(df, date_col, price_col, borrow_col, borrow_label, borrow_color, title, borrow_formatter=None):
    fig, ax1 = plt.subplots(figsize=(12, 7))

    # Plot Price
    ax1.plot(df[date_col], df[price_col], color=price_color, linewidth=line_width_price, label='Security Price (USD)')
    ax1.set_ylabel('Security Price (USD)', fontsize=label_fontsize, fontweight=label_fontweight, color=price_color, labelpad=8)
    ax1.tick_params(axis='y', labelcolor=price_color, labelsize=tick_label_fontsize)
    for lab in ax1.get_yticklabels(): lab.set_fontweight(tick_label_fontweight)
    ax1.set_xmargin(0)

    # Create a twin axis for borrow metric
    ax2 = ax1.twinx()
    ax2.plot(df[date_col], df[borrow_col], color=borrow_color, linewidth=line_width_borrow, label=borrow_label)
    ax2.set_ylabel(borrow_label, fontsize=label_fontsize, fontweight=label_fontweight, color=borrow_color, rotation=270, labelpad=15, va='bottom')
    ax2.tick_params(axis='y', labelcolor=borrow_color, labelsize=tick_label_fontsize)
    if borrow_formatter:
        ax2.yaxis.set_major_formatter(FuncFormatter(borrow_formatter))
    for lab in ax2.get_yticklabels(): lab.set_fontweight(tick_label_fontweight)

    # Set X-axis label (Business Date) only on the primary axis
    ax1.set_xlabel(date_col, fontsize=label_fontsize, fontweight=label_fontweight, color='#374151', labelpad=8)

    # Set title
    plt.title(title, fontsize=title_fontsize, fontweight=title_fontweight, color='#006db7')

    # Add legend
    lines, labels = ax1.get_legend_handles_labels()
    lines2, labels2 = ax2.get_legend_handles_labels()
    ax2.legend(lines + lines2, labels + labels2, loc='upper left', frameon=True, fontsize=label_fontsize)
    leg = ax2.legend(lines + lines2, labels + labels2, loc='upper left', frameon=True, fontsize=label_fontsize)
    leg.get_frame().set_facecolor('white'); leg.get_frame().set_edgecolor('none')
    for txt in leg.get_texts(): txt.set_fontweight(label_fontweight)


    # Add border and spine styling
    fig.patch.set_edgecolor(border_color)
    fig.patch.set_linewidth(16)
    for spine in ax1.spines.values(): spine.set_edgecolor('#BDDFFF'); spine.set_linewidth(1.5)
    for spine in ax2.spines.values(): spine.set_edgecolor('#BDDFFF'); spine.set_linewidth(1.5)

    # Add source annotation
    plt.annotate('Source: EquiLend Data & Analytics', (0.975, 0.025), xycoords='figure fraction', ha='right', fontsize=12, fontweight='bold', color='#006db7')

    plt.tight_layout()
    plt.show()

# Kohl's Charts
create_dual_axis_chart(kohls_df, kohls_date_col, kohls_price_col, kohls_qty_col, 'On Loan Quantity', loan_qty_color, 'Kohl\'s: Security Price vs On Loan Quantity', borrow_formatter=million_formatter)
create_dual_axis_chart(kohls_df, kohls_date_col, kohls_price_col, kohls_utilization_col, 'Utilization', utilization_color, 'Kohl\'s: Security Price vs Utilization', borrow_formatter=percent_formatter)
create_dual_axis_chart(kohls_df, kohls_date_col, kohls_price_col, kohls_fee_col, 'Fee All (bps)', fee_color, 'Kohl\'s: Security Price vs Fee All (bps)', borrow_formatter=bps_formatter)

# OpenDoor Charts
create_dual_axis_chart(opendoor_df, opendoor_date_col, opendoor_price_col, opendoor_qty_col, 'On Loan Quantity', loan_qty_color, 'OpenDoor: Security Price vs On Loan Quantity', borrow_formatter=million_formatter)
create_dual_axis_chart(opendoor_df, opendoor_date_col, opendoor_price_col, opendoor_utilization_col, 'Utilization', utilization_color, 'OpenDoor: Security Price vs Utilization', borrow_formatter=percent_formatter)
create_dual_axis_chart(opendoor_df, opendoor_date_col, opendoor_price_col, opendoor_fee_col, 'Fee 1 (bps)', fee_color, 'OpenDoor: Security Price vs Fee 1 (bps)', borrow_formatter=bps_formatter)

## Display charts

### Subtask:
Display the generated charts.


**Reasoning**:
Review the previous code to confirm that `plt.show()` was called after generating each chart to ensure they are displayed.



In [None]:
# Review of the previous code indicates that plt.show() was called
# after the creation and customization of each chart within the
# create_dual_axis_chart function.
# Therefore, no additional code is needed to display the charts.
# The subtask is complete.

## Summary:

### Data Analysis Key Findings

*   Six dual-axis charts were successfully generated and displayed, three for Kohl's (KSS) and three for OpenDoor (OPEN).
*   For Kohl's, charts visualize the relationship between Security Price and On Loan Quantity, Security Price and Utilization, and Security Price and Fee All (bps).
*   For OpenDoor, charts visualize the relationship between Security Price and On Loan Quantity, Security Price and Utilization, and Security Price and Fee 1 (bps).
*   Each chart uses 'Business Date' on the x-axis, 'Security Price' on the primary y-axis, and the chosen borrow metric on the secondary y-axis.
*   The charts are customized with clear titles, axis labels (including units and formatters for Millions, percentage, and basis points), legends, consistent color schemes, line weights, and a source annotation.

### Insights or Next Steps

*   Analyze the generated charts to identify any visual correlations or patterns between security price movements and changes in borrow quantity, utilization, and fees for both Kohl's and OpenDoor.
*   Consider performing a statistical analysis (e.g., correlation coefficient) to quantify the relationship between price and the borrow metrics.
