# Fix the predicted Tbreak has big difference with Tbreak presented in the paper

In [1]:
rootDir=r'C:\Users\Sir\OneDrive\Univeristy\HEA Analysis\4K06\Code\Data Folder\Processed Data'
TrainDtCsvDir=rootDir+r'\Train_RevisionWithOriTbreak.csv'
ValDtCsvDir=rootDir+r'Val_Tbreak_Yield.csv'
ValDtCsvDir0=rootDir+r'\Val_Tbreak_Yield_0Added.csv'

In [33]:
import pandas as pd
import numpy as np
from scipy.optimize import differential_evolution
import matplotlib.pyplot as plt
# Load the uploaded CSV file
file_path = TrainDtCsvDir
df = pd.read_csv(file_path)

# Filter rows where the "Diff" column is greater than 15%
df_filtered = df[df['Diff'].str.rstrip('%').astype(float) > 15]

# Prepare the fixed Tbreak values (Tbreak [oC] + 273.15)
df_filtered['Tbreak_fixed'] = df_filtered['Tbreak [oC]'] + 273.15

# Extract the relevant columns (temperatures and yield strengths) for processing
temperatures = df_filtered['Tbreak_fixed'].values
ys_observed = df_filtered['YS at Tbreak'].values

# Define the function to rerun with fixed Tbreak
def calculate_tbreak_and_ys_at_tbreak_fixed(temperatures, ys_observed, Tbreak_fixed):
    temperatures = np.array(temperatures)
    ys_observed = np.array(ys_observed)

    # Check and replace 0 yield strength values with 1 to avoid log issues
    ys_observed[ys_observed == 0] = 1

    Tm = temperatures[-1]  # Assuming the melting temperature is the last temperature in the array

    def calculate_mse_intercept(params):
        Tbreak, c1, b1, c2 = params
        b2 = b1 + (c1 - c2) * (Tbreak / Tm)  # Enforce intersection at Tbreak
        log_ys_model = np.where(
            temperatures <= Tbreak,
            c1 * (temperatures / Tm) + b1,
            c2 * (temperatures / Tm) + b2
        )
        log_ys_observed = np.log(ys_observed)
        mse = np.mean((log_ys_model - log_ys_observed) ** 2)
        return mse
    paraBound=35
    bounds_intercept = [(temperatures.min(), temperatures.max()), (-paraBound, 0), (-paraBound, paraBound), (-paraBound, 0)]
    result_intercept = differential_evolution(calculate_mse_intercept, bounds_intercept)

    # Extract the optimized parameters
    Tbreak_opt_intercept, c1_opt_intercept, b1_opt_intercept, c2_opt_intercept = result_intercept.x
    b2_opt_intercept = b1_opt_intercept + (c1_opt_intercept - c2_opt_intercept) * (Tbreak_opt_intercept / Tm)
    # Calculate b2 with the enforced condition
    if Tbreak_fixed<Tbreak_opt_intercept:
        ys_at_tbreak = np.exp(c1_opt_intercept * (Tbreak_fixed / Tm) + b1_opt_intercept)   
    else:
         ys_at_tbreak = np.exp(c2_opt_intercept * (Tbreak_fixed / Tm) + b2_opt_intercept)   
    return Tbreak_fixed, ys_at_tbreak

# Now rerun the function for each row in df_filtered and store results
results = []
for comp in df_filtered['Comp'].unique():
    # For each composition, gather all the corresponding rows
    comp_rows = df_filtered[df_filtered['Comp'] == comp]
    # Gather temperatures and yields for this composition
    temperatures = comp_rows['Temp'].values  # All temperatures for the composition
    ys_observed = comp_rows['Yield'].values  # All yields for the composition
    Tbreak_fixed = comp_rows['Tbreak_fixed'].iloc[0]  # Use the fixed Tbreak for this composition
    result = calculate_tbreak_and_ys_at_tbreak_fixed(temperatures, ys_observed, Tbreak_fixed)
    for idx in comp_rows.index:
        results.append((idx, result[0],result[1]))




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_filtered['Tbreak_fixed'] = df_filtered['Tbreak [oC]'] + 273.15


In [30]:
results_df = pd.DataFrame(results, columns=['Index','Tbreak_fixed', 'YS_at_Tbreak'])
results_df.set_index('Index', inplace=True)

# Now, update the original DataFrame row by row, using the index
for idx in results_df.index:
    df.loc[idx, 'Tbreak_fixed'] = results_df.loc[idx, 'Tbreak_fixed']
    df.loc[idx, 'YS_at_Tbreak'] = results_df.loc[idx, 'YS_at_Tbreak']


# The original DataFrame `df` is now updated with the new columns.
# You can display or save it to a file as needed
df.head()  # To check the updated DataFrame

Unnamed: 0.5,Unnamed: 0.4,Unnamed: 0.3,Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,Yield,Comp,Al,Cr,Mn,...,Solvus Temp. [oC],Diff_Melt_Tbreak,Tbreak (K),YS at Tbreak,Diff,Assigned Point,Criteria Satisfied,Temp Range,Tbreak_fixed,YS_at_Tbreak
0,0,0,0,205,205,1600,HfMoTaTiZr,0.0,0.0,0.0,...,2279,1635.32,1339.984597,848.358549,7.65%,6698.132812,3,5,,
1,1,1,1,206,206,1045,HfMoTaTiZr,0.0,0.0,0.0,...,2279,1635.32,1339.984597,848.358549,7.65%,6698.132812,3,5,,
2,2,2,2,207,207,855,HfMoTaTiZr,0.0,0.0,0.0,...,2279,1635.32,1339.984597,848.358549,7.65%,6698.132812,3,5,,
3,3,3,3,208,208,404,HfMoTaTiZr,0.0,0.0,0.0,...,2279,1635.32,1339.984597,848.358549,7.65%,6698.132812,3,5,,
4,4,4,4,209,209,0,HfMoTaTiZr,0.0,0.0,0.0,...,2279,1635.32,1339.984597,848.358549,7.65%,6698.132812,3,5,,


In [32]:
df.to_csv('FixedTbreakWithPP.csv')

In [24]:
df_filtered.index.shape

(88,)