In [33]:
import pandas as pd

# Load the dataset
file_path = r"C:\Users\chawl\Downloads\archive (2)\GlobalTemperatures.csv"
df = pd.read_csv(file_path)

# Display the first few rows of the dataset
print(df.head())


           dt  LandAverageTemperature  LandAverageTemperatureUncertainty  \
0  1750-01-01                   3.034                              3.574   
1  1750-02-01                   3.083                              3.702   
2  1750-03-01                   5.626                              3.076   
3  1750-04-01                   8.490                              2.451   
4  1750-05-01                  11.573                              2.072   

   LandMaxTemperature  LandMaxTemperatureUncertainty  LandMinTemperature  \
0                 NaN                            NaN                 NaN   
1                 NaN                            NaN                 NaN   
2                 NaN                            NaN                 NaN   
3                 NaN                            NaN                 NaN   
4                 NaN                            NaN                 NaN   

   LandMinTemperatureUncertainty  LandAndOceanAverageTemperature  \
0                 

In [35]:
# Convert 'dt' column to datetime format and extract the year
df['Year'] = pd.to_datetime(df['dt']).dt.year

# Now let's focus on the global temperature columns
# Note: 'LandAndOceanAverageTemperature' seems to be the global temperature
global_temp_df = df[['Year', 'LandAndOceanAverageTemperature', 'LandAndOceanAverageTemperatureUncertainty']].copy()

# Display the first few rows of the relevant data
print(global_temp_df.head())


   Year  LandAndOceanAverageTemperature  \
0  1750                             NaN   
1  1750                             NaN   
2  1750                             NaN   
3  1750                             NaN   
4  1750                             NaN   

   LandAndOceanAverageTemperatureUncertainty  
0                                        NaN  
1                                        NaN  
2                                        NaN  
3                                        NaN  
4                                        NaN  


In [36]:
# Drop rows with missing values
global_temp_df = global_temp_df.dropna()

# Display the cleaned data
print(global_temp_df.head())


      Year  LandAndOceanAverageTemperature  \
1200  1850                          12.833   
1201  1850                          13.588   
1202  1850                          14.043   
1203  1850                          14.667   
1204  1850                          15.507   

      LandAndOceanAverageTemperatureUncertainty  
1200                                      0.367  
1201                                      0.414  
1202                                      0.341  
1203                                      0.267  
1204                                      0.249  


In [37]:
# Calculate temperature anomalies (deviation from the mean)
global_temp_df['Temp_Anomaly'] = global_temp_df['LandAndOceanAverageTemperature'] - global_temp_df['LandAndOceanAverageTemperature'].mean()

# Select features for the model
features = global_temp_df[['Year', 'Temp_Anomaly']]
print(features.head())

# Now let's prepare this data for use in a VQC model (as previously outlined)


      Year  Temp_Anomaly
1200  1850     -2.379566
1201  1850     -1.624566
1202  1850     -1.169566
1203  1850     -0.545566
1204  1850      0.294434


In [38]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Scale the features
scaler = StandardScaler()
scaled_features = scaler.fit_transform(features[['Temp_Anomaly']])

# Split the data into training and testing sets
train_features, test_features, train_labels, test_labels = train_test_split(
    scaled_features, features['Year'], test_size=0.2, random_state=42)

# Convert labels to binary for a simple classification task
train_labels = (train_labels > train_labels.mean()).astype(int)
test_labels = (test_labels > test_labels.mean()).astype(int)


In [40]:
from qiskit.circuit.library import ZZFeatureMap, RealAmplitudes
from qiskit_algorithms.optimizers import COBYLA
from qiskit.primitives import Sampler
from qiskit_machine_learning.algorithms.classifiers import VQC

# Rest of your code...
import time

# Define the feature map and ansatz
num_features = train_features.shape[1]
feature_map = ZZFeatureMap(feature_dimension=num_features, reps=1)
ansatz = RealAmplites(num_qubits=num_features, reps=3)

# Define the optimizer
optimizer = COBYLA(maxiter=100)

# Define the sampler
sampler = Sampler()

# Initialize and train the VQC
vqc = VQC(sampler=sampler, feature_map=feature_map, ansatz=ansatz, optimizer=optimizer)

start = time.time()
vqc.fit(train_features, train_labels)
elapsed = time.time() - start

print(f"Training time: {elapsed:.2f} seconds")

# Evaluate the model
predictions = vqc.predict(test_features)

ValueError: The ZZFeatureMap contains 2-local interactions and cannot be defined for less than 2 qubits. You provided 1.

In [41]:
import pandas as pd

# Load the data
df = pd.read_csv(r'C:\Users\chawl\Downloads\archive (2)\GlobalTemperatures.csv')

# Convert the 'dt' column to datetime format and extract the year
df['Year'] = pd.to_datetime(df['dt']).dt.year

# Calculate temperature anomalies (deviation from the mean)
df['Temp_Anomaly'] = df['LandAndOceanAverageTemperature'] - df['LandAndOceanAverageTemperature'].mean()

# Drop rows with missing values
df = df.dropna()

# Select the features and target variable
train_features = df[['Year', 'LandAndOceanAverageTemperature', 'Temp_Anomaly']]
train_labels = (df['LandAndOceanAverageTemperature'] > df['LandAndOceanAverageTemperature'].mean()).astype(int)

print(train_features.head())


      Year  LandAndOceanAverageTemperature  Temp_Anomaly
1200  1850                          12.833     -2.379566
1201  1850                          13.588     -1.624566
1202  1850                          14.043     -1.169566
1203  1850                          14.667     -0.545566
1204  1850                          15.507      0.294434


In [42]:
from sklearn.preprocessing import MinMaxScaler

# Scale the features
scaler = MinMaxScaler(feature_range=(-1, 1))
train_features = scaler.fit_transform(train_features)

print(train_features[:5])


[[-1.         -0.8605919  -0.8605919 ]
 [-1.         -0.56658879 -0.56658879]
 [-1.         -0.3894081  -0.3894081 ]
 [-1.         -0.14641745 -0.14641745]
 [-1.          0.18068536  0.18068536]]


In [46]:
from qiskit.circuit.library import ZZFeatureMap, RealAmplitudes
from qiskit_algorithms.optimizers import COBYLA
from qiskit.primitives import Sampler
from qiskit_machine_learning.algorithms.classifiers import VQC
from sklearn.model_selection import train_test_split
import numpy as np

# Assume 'train_features' and 'train_labels' are already defined
# Define the feature map and ansatz
num_features = train_features.shape[1]
feature_map = ZZFeatureMap(feature_dimension=num_features, reps=1)
ansatz = RealAmplitudes(num_qubits=num_features, reps=3)

# Define the optimizer
optimizer = COBYLA(maxiter=100)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(train_features, train_labels, test_size=0.2, random_state=42)

# Convert y_train and y_test to numpy arrays if they are not already
y_train = np.array(y_train)
y_test = np.array(y_test)

# Create the VQC model
vqc = VQC(feature_map=feature_map, ansatz=ansatz, optimizer=optimizer, sampler=Sampler())

# Train the VQC model
vqc.fit(X_train, y_train)

# Evaluate the model
score = vqc.score(X_test, y_test)
print(f"Test accuracy: {score}")


Test accuracy: 0.5964912280701754
