## Importing Data: Working with CSV Files

In [5]:
import pandas as pd
df = pd.read_csv('model_logs_100.csv')

In [6]:
df.head()

Unnamed: 0,Date,Prompt,Response Time (ms),Tokens Generated
0,2023-01-01,Explain quantum computing in simple terms,70,285
1,2023-01-02,Write a sci-fi short story set in 2050,93,401
2,2023-01-03,Generate marketing copy for a new tech product,78,377
3,2023-01-04,Generate marketing copy for a new tech product,120,372
4,2023-01-05,Create a recipe using plant-based ingredients,84,316


In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 4 columns):
 #   Column              Non-Null Count  Dtype 
---  ------              --------------  ----- 
 0   Date                100 non-null    object
 1   Prompt              100 non-null    object
 2   Response Time (ms)  100 non-null    int64 
 3   Tokens Generated    100 non-null    int64 
dtypes: int64(2), object(2)
memory usage: 3.3+ KB


In [8]:
df.describe()

Unnamed: 0,Response Time (ms),Tokens Generated
count,100.0,100.0
mean,76.33,339.97
std,26.477379,86.329935
min,30.0,200.0
25%,53.75,262.5
50%,74.5,335.0
75%,101.25,406.0
max,120.0,494.0


In [9]:
df['Date'] = pd.to_datetime(df['Date'])

In [10]:
df.dtypes

Date                  datetime64[ns]
Prompt                        object
Response Time (ms)             int64
Tokens Generated               int64
dtype: object

In [17]:
df = pd.read_csv('model_logs_100.csv', parse_dates=['Date'])

In [12]:
df.dtypes

Date                  datetime64[ns]
Prompt                        object
Response Time (ms)             int64
Tokens Generated               int64
dtype: object

In [18]:
mask = df['Response Time (ms)'] > 50
slow_responses = df[mask]
slow_responses

Unnamed: 0,Date,Prompt,Response Time (ms),Tokens Generated
0,2023-01-01,Explain quantum computing in simple terms,70,285
1,2023-01-02,Write a sci-fi short story set in 2050,93,401
2,2023-01-03,Generate marketing copy for a new tech product,78,377
3,2023-01-04,Generate marketing copy for a new tech product,120,372
4,2023-01-05,Create a recipe using plant-based ingredients,84,316
...,...,...,...,...
94,2023-04-05,Generate marketing copy for a new tech product,81,355
95,2023-04-06,Generate marketing copy for a new tech product,120,430
96,2023-04-07,Outline the ethical considerations of AI,89,402
97,2023-04-08,Explain quantum computing in simple terms,52,462


In [19]:
slow_responses.to_csv('slow_responses.csv', index=False)
print("Filtered data saved to 'slow_responses.csv'!")

Filtered data saved to 'slow_responses.csv'!


In [20]:
import pandas as pd
df = pd.read_csv('model_logs_100.csv')
df

Unnamed: 0,Date,Prompt,Response Time (ms),Tokens Generated
0,2023-01-01,Explain quantum computing in simple terms,70,285
1,2023-01-02,Write a sci-fi short story set in 2050,93,401
2,2023-01-03,Generate marketing copy for a new tech product,78,377
3,2023-01-04,Generate marketing copy for a new tech product,120,372
4,2023-01-05,Create a recipe using plant-based ingredients,84,316
...,...,...,...,...
95,2023-04-06,Generate marketing copy for a new tech product,120,430
96,2023-04-07,Outline the ethical considerations of AI,89,402
97,2023-04-08,Explain quantum computing in simple terms,52,462
98,2023-04-09,Describe the impact of AI on healthcare,93,254


### Exporting to Excel

In [23]:
pip install openpyxl -q


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.1.1[0m[39;49m -> [0m[32;49m25.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [24]:
df.to_excel('data.xlsx', index=False)
print("Data exported to 'data.xlsx'")

Data exported to 'data.xlsx'


## Exporting to JSON

In [25]:
df.to_json('data.json', orient='columns')

In [26]:
df.to_json('chat_data.jsonl', orient='records', lines=True)

## Export to SQL

In [28]:
import sqlite3
conn = sqlite3.connect('chat_data.db')
df.to_sql('chat_data.db', conn, if_exists='replace', index=False)
conn.close()

## Exporting to YAML

In [29]:
import yaml
data_dict = df.to_dict(orient='records')

In [30]:
data_dict

[{'Date': '2023-01-01',
  'Prompt': 'Explain quantum computing in simple terms',
  'Response Time (ms)': 70,
  'Tokens Generated': 285},
 {'Date': '2023-01-02',
  'Prompt': 'Write a sci-fi short story set in 2050',
  'Response Time (ms)': 93,
  'Tokens Generated': 401},
 {'Date': '2023-01-03',
  'Prompt': 'Generate marketing copy for a new tech product',
  'Response Time (ms)': 78,
  'Tokens Generated': 377},
 {'Date': '2023-01-04',
  'Prompt': 'Generate marketing copy for a new tech product',
  'Response Time (ms)': 120,
  'Tokens Generated': 372},
 {'Date': '2023-01-05',
  'Prompt': 'Create a recipe using plant-based ingredients',
  'Response Time (ms)': 84,
  'Tokens Generated': 316},
 {'Date': '2023-01-06',
  'Prompt': 'Write a sci-fi short story set in 2050',
  'Response Time (ms)': 72,
  'Tokens Generated': 427},
 {'Date': '2023-01-07',
  'Prompt': 'Create a recipe using plant-based ingredients',
  'Response Time (ms)': 74,
  'Tokens Generated': 494},
 {'Date': '2023-01-08',
  'P

In [31]:
with open('chat_data.yaml', 'w') as file:
    yaml.dump(data_dict, file)
    print('Data exported to chat_data.yaml!')

Data exported to chat_data.yaml!
