In [1]:
import pandas as pd

# Load CSV file
file_path = 'drive data.csv'  
data = pd.read_csv(file_path)

# Preview the data
print(data.head())


   Drive ID  Recording ID Start (UTC) End (UTC)  Unnamed: 4 Day/Night  \
0         1             1    14:03:30  14:03:47         NaN       Day   
1         2             1    12:00:29  12:00:54         NaN       Day   
2         3             1    15:15:13  15:16:40         NaN     Night   
3         3             2    15:21:50  15:24:12         NaN     Night   

  Weather Conditions  Temperature[°C]  Humidity[%]  Precipitation[mm]  
0          Clear sky             31.1           40                  0  
1           Overcast             18.9           51                  0  
2      Partly cloudy             -0.5           90                  0  
3      Partly cloudy             -0.5           90                  0  


In [2]:
from transformers import AutoTokenizer, AutoModelForCausalLM

# Load BLOOM model and tokenizer
model_name = "bigscience/bloom-560m"  # Example model
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)


  from .autonotebook import tqdm as notebook_tqdm
To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


In [3]:
# Combine columns into descriptive prompts
def create_prompt(row):
    return (f"During the {row['Day/Night']} time, the weather was {row['Weather Conditions']} "
            f"with a temperature of {row['Temperature[°C]']}°C, "
            f"humidity at {row['Humidity[%]']}%, and precipitation of {row['Precipitation[mm]']} mm.")

data['prompt'] = data.apply(create_prompt, axis=1)
print(data[['prompt']].head())  # Preview the prompts

                                              prompt
0  During the Day time, the weather was Clear sky...
1  During the Day time, the weather was Overcast ...
2  During the Night time, the weather was Partly ...
3  During the Night time, the weather was Partly ...


In [4]:
# Generate text for each prompt
generated_texts = []
for prompt in data['prompt']:
    inputs = tokenizer(prompt, return_tensors="pt")
    outputs = model.generate(inputs["input_ids"], max_length=100, num_return_sequences=1)
    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    generated_texts.append(generated_text)

# Add generated texts back to the DataFrame
data['generated_text'] = generated_texts

# Save to a new CSV
output_path = 'generated_descriptions.csv'
data.to_csv(output_path, index=False)
print(f"Generated descriptions saved to {output_path}")

Generated descriptions saved to generated_descriptions.csv
