In [1]:
import os
import pandas as pd
from sklearn.cluster import KMeans
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report


In [2]:
folder_path = "XRD_csv"
csv_files = [file for file in os.listdir(folder_path) if file.endswith(".csv")]
data = []

In [3]:
for file in csv_files:
    file_path = os.path.join(folder_path, file)
    df = pd.read_csv(file_path)
    data.append(df[['Angle', 'Intensity']])


In [4]:
features = []

for file_data in data:
    # Perform feature extraction for each file's data
    intensity_values = file_data['Intensity']
    mean_intensity = intensity_values.mean()
    std_intensity = intensity_values.std()
    extracted_features = [mean_intensity, std_intensity]
    features.append(extracted_features)

In [6]:
n_clusters = 7  # Number of expected peak types
kmeans = KMeans(n_clusters=n_clusters, random_state=42)
cluster_labels = kmeans.fit_predict(features)



In [7]:
file_labels = []

for label in cluster_labels:
    if label == 0:
        file_labels.append('TypeA')
    elif label == 1:
        file_labels.append('TypeB')
    elif label == 2:
        file_labels.append('TypeC')
    elif label == 3:
        file_labels.append('TypeD')
    elif label == 4:
        file_labels.append('TypeE')
    elif label ==5:
        file_labels.append('TypeF')
    elif label ==6:
        file_labels.append('TypeG')

In [8]:
classifier = RandomForestClassifier()
classifier.fit(features, file_labels)

In [9]:
new_file = 'OPCM-14D 2.csv'  # Example file
new_file_path = os.path.join(folder_path, new_file)
new_data = pd.read_csv(new_file_path)[['Angle', 'Intensity']]
intensity_values = new_data['Intensity']
mean_intensity = intensity_values.mean()
std_intensity = intensity_values.std()
new_features = [mean_intensity, std_intensity]


In [10]:
predicted_label = classifier.predict([new_features])
print("Predicted label for the new file:", predicted_label[0])

Predicted label for the new file: TypeA


In [15]:
classification_probabilities = classifier.predict_proba([new_features])
class_labels = classifier.classes_
probabilities = dict(zip(class_labels, classification_probabilities[0]))
print("Classification probabilities:", probabilities)

Classification probabilities: {'TypeA': 0.0, 'TypeB': 0.09, 'TypeC': 0.35, 'TypeD': 0.1, 'TypeE': 0.04, 'TypeF': 0.05, 'TypeG': 0.37}


In [12]:
new_file = 'D:\ml intern\clg_work\XRD\CC.csv'  # Example file
new_file_path = os.path.join(folder_path, new_file)
new_data2 = pd.read_csv(new_file_path)[['Angle', 'Intensity']]
intensity_values = new_data2['Intensity']
mean_intensity = intensity_values.mean()
std_intensity = intensity_values.std()
new_features = [mean_intensity, std_intensity]

In [13]:
predicted_label = classifier.predict([new_features])
print("Predicted label for the new file:", predicted_label[0])

Predicted label for the new file: TypeG


In [14]:
classification_probabilities = classifier.predict_proba([new_features])
class_labels = classifier.classes_
probabilities = dict(zip(class_labels, classification_probabilities[0]))
print("Classification probabilities:", probabilities)

Classification probabilities: {'TypeA': 0.0, 'TypeB': 0.09, 'TypeC': 0.35, 'TypeD': 0.1, 'TypeE': 0.04, 'TypeF': 0.05, 'TypeG': 0.37}
